You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@impala.apache.org by mi...@apache.org on 2018/08/17 15:48:20 UTC

[1/4] impala git commit: IMPALA-7399: Remove third-party dependencies from junit xml script

Repository: impala
Updated Branches:
  refs/heads/master 7ccf73690 -> 199c0f2f9


IMPALA-7399: Remove third-party dependencies from junit xml script

The original patch for this Jira relied on a third party python lib
for generating Junit XML output, That proved to be limiting because
setting up the necessary virtualenv across a variety of dev and test
scenarios (private dev environment, jenkins.impala.io, and others)
proved to be confusing and messy.

This update to the script maintains the same functionality and the
same interface, but uses only the python standard library. A symlink
has also been added to Impala/bin for convenience.

Change-Id: I958ee0d8420b6a4197aaf0a7e0538a566332ea97
Reviewed-on: http://gerrit.cloudera.org:8080/11235
Reviewed-by: David Knupp <dk...@cloudera.com>
Tested-by: David Knupp <dk...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/0286142b
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/0286142b
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/0286142b

Branch: refs/heads/master
Commit: 0286142b67c1ed9175da271457b65e642bb4178d
Parents: 7ccf736
Author: David Knupp <dk...@cloudera.com>
Authored: Tue Aug 14 11:13:19 2018 -0700
Committer: David Knupp <dk...@cloudera.com>
Committed: Thu Aug 16 21:59:52 2018 +0000

----------------------------------------------------------------------
 bin/generate_junitxml.py                        |   1 +
 bin/rat_exclude_files.txt                       |   2 +
 lib/python/README.md                            |   9 +
 .../impala_py_lib/jenkins/generate_junitxml.py  | 201 ++++++++++++++-----
 lib/python/requirements.txt                     |   4 +-
 5 files changed, 160 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/0286142b/bin/generate_junitxml.py
----------------------------------------------------------------------
diff --git a/bin/generate_junitxml.py b/bin/generate_junitxml.py
new file mode 120000
index 0000000..060cac5
--- /dev/null
+++ b/bin/generate_junitxml.py
@@ -0,0 +1 @@
+../lib/python/impala_py_lib/jenkins/generate_junitxml.py
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/impala/blob/0286142b/bin/rat_exclude_files.txt
----------------------------------------------------------------------
diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt
index 980cfba..10f36af 100644
--- a/bin/rat_exclude_files.txt
+++ b/bin/rat_exclude_files.txt
@@ -10,6 +10,7 @@
 */rat_exclude_files.txt
 be/src/testutil/htpasswd
 be/src/testutil/*.key
+bin/generate_junitxml.py
 tests/*/__init__.py
 testdata/common/__init__.py
 fe/src/test/resources/regionservers
@@ -85,6 +86,7 @@ tests/comparison/POSTGRES.txt
 docs/README.md
 docker/README.md
 be/src/thirdparty/pcg-cpp-0.98/README.md
+lib/python/README.md
 
 # http://www.apache.org/legal/src-headers.html: "Test data for which the addition of a
 # source header would cause the tests to fail."

http://git-wip-us.apache.org/repos/asf/impala/blob/0286142b/lib/python/README.md
----------------------------------------------------------------------
diff --git a/lib/python/README.md b/lib/python/README.md
new file mode 100644
index 0000000..fec1de0
--- /dev/null
+++ b/lib/python/README.md
@@ -0,0 +1,9 @@
+# Impala Python Library
+
+A future home for locally pip installable python packages for Impala.
+
+# Installation
+
+```
+$ pip install -e $IMPALA_HOME/lib/python
+```

http://git-wip-us.apache.org/repos/asf/impala/blob/0286142b/lib/python/impala_py_lib/jenkins/generate_junitxml.py
----------------------------------------------------------------------
diff --git a/lib/python/impala_py_lib/jenkins/generate_junitxml.py b/lib/python/impala_py_lib/jenkins/generate_junitxml.py
old mode 100644
new mode 100755
index ff93aa4..68bedd1
--- a/lib/python/impala_py_lib/jenkins/generate_junitxml.py
+++ b/lib/python/impala_py_lib/jenkins/generate_junitxml.py
@@ -24,55 +24,157 @@ easier triaging of build and setup errors.
 import argparse
 import errno
 import os
-import pytz
 import textwrap
+from xml.dom import minidom
+from xml.etree import ElementTree as ET
 
 from datetime import datetime as dt
-from junit_xml import TestSuite, TestCase
 
 IMPALA_HOME = os.getenv('IMPALA_HOME', '.')
+SCRIPT_NAME, _ = os.path.splitext(os.path.basename(__file__))
 
 
-def get_xml_content(file_or_string=None):
-  """
-  Derive content for the XML report.
-
-  Args:
-    file_or_string: a path to a file, or a plain string
-
-  If the supplied parameter is the path to a file, the contents will be inserted
-  into the XML report. If the parameter is just plain string, use that as the
-  content for the report.
-  """
-  if file_or_string is None:
-    content = ''
-  elif os.path.exists(file_or_string):
-    with open(file_or_string, 'r') as f:
-      content = f.read()
-  else:
-      content = file_or_string
-  return content
-
+class JunitReport(object):
+  """A Junit XML style report parseable by Jenkins for reporting build status.
 
-def generate_xml_file(testsuite, junitxml_logdir='.'):
-  """
-  Create a timestamped XML report file.
-
-  Args:
-    testsuite: junit_xml.TestSuite object
-    junitxml_logdir: path to directory where the file will be created
+  Generally, a caller who invokes this script doesn't need to do anything
+  more than supply the necessary command line parameters. The JunitReport
+  class is instantiated using those initial inputs, and a timestamped XML
+  file is output to the $IMPALA_HOME/logs/extra_junit_xml_logs/.
 
-  Return:
-    junit_log_file: path to the generated file
+  Log files are timestamped, so they will not overwrite previous files containing
+  output of the same step.
   """
-  ts_string = testsuite.timestamp.strftime('%Y%m%d_%H_%M_%S')
-  junit_log_file = os.path.join(junitxml_logdir,
-                                '{}.{}.xml'.format(testsuite.name, ts_string))
 
-  with open(junit_log_file, 'w') as f:
-    TestSuite.to_file(f, [testsuite], prettyprint=True)
+  def __init__(self, phase, step, error_msg=None, stdout=None, stderr=None,
+               elapsed_time=0):
+
+    self.root_element = None
+    self.testsuite_element = None
+    self.testcase_element = None
+
+    self.phase = phase
+    self.step = step
+    self.error_msg = error_msg
+    self.stdout = stdout
+    self.stderr = stderr
+    self.elapsed_time = elapsed_time
+    self.utc_time = dt.utcnow()
+
+    self.create_root_element()
+    self.add_testsuite_element()
+    self.add_testcase_element()
+
+    if self.error_msg is not None:
+      self.set_error()
+
+    if self.stdout is not None:
+      self.add_output('out', self.stdout)
+
+    if self.stderr is not None:
+      self.add_output('err', self.stderr)
+
+  def create_root_element(self):
+    """Create the testsuites root element."""
+    self.root_element = ET.Element("testsuites")
+    self.root_element.set("time", "{0:.1f}".format(float(self.elapsed_time)))
+    self.root_element.set("tests", "1")
+    self.root_element.set("failures", "0")
+    self.root_element.set("errors", "0")
+
+  def add_testsuite_element(self):
+    """Create the testsuite element."""
+    self.testsuite_element = ET.SubElement(self.root_element, "testsuite")
+    self.testsuite_element.set(
+      "name", "{}.{}.{}".format(SCRIPT_NAME, self.phase, self.step))
+    self.testsuite_element.set(
+      "timestamp", "{}+00:00".format(self.utc_time.strftime('%Y-%m-%d %H:%M:%S')))
+    self.testsuite_element.set("disabled", "0")
+    self.testsuite_element.set("errors", "0")
+    self.testsuite_element.set("failures", "0")
+    self.testsuite_element.set("skipped", "0")
+    self.testsuite_element.set("tests", "1")
+    self.testsuite_element.set("time", "0")
+    self.testsuite_element.set("file", "None")
+    self.testsuite_element.set("log", "None")
+    self.testsuite_element.set("url", "None")
+
+  def add_testcase_element(self):
+    """Create the testcase element."""
+    self.testcase_element = ET.SubElement(self.testsuite_element, "testcase")
+    self.testcase_element.set("classname", "{}.{}".format(SCRIPT_NAME, self.phase))
+    self.testcase_element.set("name", self.step)
+
+  def set_error(self):
+    """Set an error msg if the step failed, and increment necessary error attributes."""
+    error = ET.SubElement(self.testcase_element, "error")
+    error.set("message", self.error_msg)
+    error.set("type", "error")
+    self.testsuite_element.set("errors", "1")
+    self.root_element.set("errors", "1")
+
+  def add_output(self, output_type, file_or_string):
+    """
+    Add stdout or stderr content to testcase element.
+
+    Args:
+      output_type: [string] either out or err
+      file_or_string: a path to a file containing the content, or a plain string
+    """
+    output = ET.SubElement(self.testcase_element, "system-{}".format(output_type))
+    output.text = JunitReport.get_xml_content(file_or_string)
+
+  def to_file(self, junitxml_logdir='.'):
+    """
+    Create a timestamped XML report file.
+
+    Args:
+      junitxml_logdir: path to directory where the file will be created
+
+    Return:
+      junit_log_file: path to the generated file
+    """
+    filename = '{}.{}.xml'.format(
+        self.testsuite_element.attrib['name'],
+        self.utc_time.strftime('%Y%m%d_%H_%M_%S')
+    )
+    junit_log_file = os.path.join(junitxml_logdir, filename)
+
+    with open(junit_log_file, 'w') as f:
+      f.write(str(self))
+
+    return junit_log_file
+
+  @staticmethod
+  def get_xml_content(file_or_string=None):
+    """
+    Derive additional content for the XML report.
+
+    If the supplied parameter is the path to a file, the contents will be inserted
+    into the XML report. If the parameter is just plain string, use that as the
+    content for the report.
+
+    Args:
+      file_or_string: a path to a file, or a plain string
+
+    Returns:
+      content as a string
+    """
+    if file_or_string is None:
+      content = ''
+    elif os.path.exists(file_or_string):
+      with open(file_or_string, 'r') as f:
+        content = f.read()
+    else:
+      content = file_or_string
+    return content
 
-  return junit_log_file
+  def __str__(self):
+    """
+    Generate and return a pretty-printable XML string.
+    """
+    root_node_str = minidom.parseString(ET.tostring(self.root_element))
+    return root_node_str.toprettyxml(indent=' ' * 4)
 
 
 def get_options():
@@ -114,7 +216,7 @@ def get_options():
 
 def main():
   """
-  Create a "testcase" for each invocation of the script, and output the results
+  Create a report for each invocation of the script, and output the results
   of the test case to an XML file within $IMPALA_HOME/logs/extra_junit_xml_logs.
   The log file name will use "phase" and "step" values provided on the command
   line to structure the report. The XML report filename will follow the form:
@@ -136,24 +238,15 @@ def main():
       raise
 
   options = get_options()
-  root_name, _ = os.path.splitext(os.path.basename(__file__))
-
-  tc = TestCase(classname='{}.{}'.format(root_name, options.phase),
-                name=options.step,
-                elapsed_sec=options.time,
-                stdout=get_xml_content(options.stdout),
-                stderr=get_xml_content(options.stderr))
-
-  # Specifying an error message for any step causes the buid to be marked as invalid.
-  if options.error:
-    tc.add_error_info(get_xml_content(options.error))
-    assert tc.is_error()
 
-  testsuite = TestSuite(name='{}.{}.{}'.format(root_name, options.phase, options.step),
-                        timestamp=dt.utcnow().replace(tzinfo=pytz.UTC),
-                        test_cases=[tc])
+  junit_report = JunitReport(phase=options.phase,
+                             step=options.step,
+                             error_msg=options.error,
+                             stdout=options.stdout,
+                             stderr=options.stderr,
+                             elapsed_time=options.time)
 
-  xml_report = generate_xml_file(testsuite, junitxml_logdir)
+  xml_report = junit_report.to_file(junitxml_logdir)
   print("Generated: {}".format(xml_report))
 
 

http://git-wip-us.apache.org/repos/asf/impala/blob/0286142b/lib/python/requirements.txt
----------------------------------------------------------------------
diff --git a/lib/python/requirements.txt b/lib/python/requirements.txt
index 1995eb0..67be0c2 100644
--- a/lib/python/requirements.txt
+++ b/lib/python/requirements.txt
@@ -15,6 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-junit-xml==1.8
-pytz==2018.5
-six==1.11.0
+# List thirdparty python dependencies for impala_py_lib here
\ No newline at end of file

[4/4] impala git commit: IMPALA-7140 fix: fix NDV calculation for partition columns in LocalFsTable

Posted by mi...@apache.org.

IMPALA-7140 fix: fix NDV calculation for partition columns in LocalFsTable

For partition columns, we were previously calculating the NDV by adding
the count of partitions plus the count of null-partitions. However, all
of the null partitions have the same value (NULL) so they only
contribute one distinct value, regardless of how many there might be.

This fixes an existing test case in test_compute_stats so doesn't
include new tests.

Change-Id: I0a86750d52dcd744ace030a3c1ec70510d054acf
Reviewed-on: http://gerrit.cloudera.org:8080/11230
Reviewed-by: Vuk Ercegovac <ve...@cloudera.com>
Reviewed-by: Bharath Vissapragada <bh...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/199c0f2f
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/199c0f2f
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/199c0f2f

Branch: refs/heads/master
Commit: 199c0f2f9f5ef96e232c53579a21009242ea866c
Parents: 7c0450d
Author: Todd Lipcon <to...@apache.org>
Authored: Tue Aug 14 16:38:43 2018 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Fri Aug 17 00:54:41 2018 +0000

----------------------------------------------------------------------
 .../main/java/org/apache/impala/catalog/local/LocalFsTable.java   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/199c0f2f/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java
index 7743949..f19adfd 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalFsTable.java
@@ -520,7 +520,8 @@ public class LocalFsTable extends LocalTable implements FeFsTable {
       ColumnStats stats = getColumns().get(i).getStats();
       int nonNullParts = partitionValueMap_.get(i).size();
       int nullParts = nullPartitionIds_.get(i).size();
-      stats.setNumDistinctValues(nonNullParts + nullParts);
+      stats.setNumDistinctValues(nonNullParts + (nullParts > 0 ? 1 : 0));
+
       // TODO(todd): this calculation ends up setting the num_nulls stat
       // to the number of partitions with null rows, not the number of rows.
       // However, it maintains the existing behavior from HdfsTable.

[3/4] impala git commit: IMPALA-7439. CREATE DATABASE creates a catalog entry with missing location

Posted by mi...@apache.org.

IMPALA-7439. CREATE DATABASE creates a catalog entry with missing location

Previously, when creating a new database, the CatalogOpExecutor would
create an HMS Database object, issue the HMS createDatabase call, and
then create a Catalog entry from that same Database object. The
resulting Catalog entry would be missing certain fields that are
auto-created by the HMS itself, most importantly the location field.

The code for CTAS seems to have contained a workaround for this issue
ever since catalogd was first introduced: rather than using the location
stored in the Db object, it would re-fetch the Database from HMS.

Now that this is fixed, that workaround could be removed and some code
simplified.

A new test verifies that a newly-created database has the appropriate
location, and existing CTAS tests verify that functionality didn't
regress.

Change-Id: I13df31cee1e5768b073e0e35c4c16ebf1892be23
Reviewed-on: http://gerrit.cloudera.org:8080/11229
Reviewed-by: Vuk Ercegovac <ve...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/7c0450d6
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/7c0450d6
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/7c0450d6

Branch: refs/heads/master
Commit: 7c0450d6d4b43ab920d4abf97ee918eeca3e6fcf
Parents: a8b32db
Author: Todd Lipcon <to...@apache.org>
Authored: Tue Aug 14 01:43:33 2018 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Fri Aug 17 00:54:28 2018 +0000

----------------------------------------------------------------------
 .../impala/analysis/CreateTableAsSelectStmt.java | 17 ++++++++++++-----
 .../org/apache/impala/catalog/FeCatalog.java     | 14 --------------
 .../apache/impala/catalog/ImpaladCatalog.java    | 17 -----------------
 .../impala/catalog/local/LocalCatalog.java       | 14 --------------
 .../apache/impala/service/CatalogOpExecutor.java |  5 +++++
 .../queries/QueryTest/create-database.test       | 19 +++++++++++++++++++
 6 files changed, 36 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/7c0450d6/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
index 4753b62..9bfd6d2 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableAsSelectStmt.java
@@ -21,14 +21,14 @@ import java.util.Collections;
 import java.util.EnumSet;
 import java.util.List;
 
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.impala.authorization.Privilege;
-import org.apache.impala.catalog.Db;
 import org.apache.impala.catalog.FeDb;
 import org.apache.impala.catalog.FeFsTable;
 import org.apache.impala.catalog.FeKuduTable;
 import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.HdfsFileFormat;
-import org.apache.impala.catalog.HdfsTable;
 import org.apache.impala.catalog.KuduTable;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.AnalysisException;
@@ -207,9 +207,11 @@ public class CreateTableAsSelectStmt extends StatementBase {
         CatalogOpExecutor.createMetaStoreTable(createStmt_.toThrift());
 
     try {
-      // Set a valid location of this table using the same rules as the metastore. If the
-      // user specified a location for the table this will be a no-op.
-      msTbl.getSd().setLocation(analyzer.getCatalog().getTablePath(msTbl).toString());
+      // Set a valid location of this table using the same rules as the metastore, unless
+      // the user specified a path.
+      if (msTbl.getSd().getLocation() == null || msTbl.getSd().getLocation().isEmpty()) {
+        msTbl.getSd().setLocation(getPathForNewTable(db, msTbl));
+      }
 
       FeTable tmpTable = null;
       if (KuduTable.isKuduTable(msTbl)) {
@@ -231,6 +233,11 @@ public class CreateTableAsSelectStmt extends StatementBase {
     insertStmt_.analyze(analyzer);
   }
 
+  private static String getPathForNewTable(FeDb db, Table msTbl) {
+    String dbLocation = db.getMetaStoreDb().getLocationUri();
+    return new Path(dbLocation, msTbl.getTableName().toLowerCase()).toString();
+  }
+
   @Override
   public List<Expr> getResultExprs() { return insertStmt_.getResultExprs(); }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/7c0450d6/fe/src/main/java/org/apache/impala/catalog/FeCatalog.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/FeCatalog.java b/fe/src/main/java/org/apache/impala/catalog/FeCatalog.java
index c98f6fc..1a09a99 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FeCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FeCatalog.java
@@ -19,15 +19,12 @@ package org.apache.impala.catalog;
 import java.util.List;
 import java.util.Set;
 
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.impala.analysis.TableName;
 import org.apache.impala.common.InternalException;
 import org.apache.impala.thrift.TCatalogObject;
 import org.apache.impala.thrift.TPartitionKeyValue;
 import org.apache.impala.thrift.TUniqueId;
 import org.apache.impala.util.PatternMatcher;
-import org.apache.thrift.TException;
 
 /**
  * Interface between the front-end (analysis and planning) classes and the Catalog.
@@ -82,17 +79,6 @@ public interface FeCatalog {
   void waitForCatalogUpdate(long timeoutMs);
 
   /**
-   * Returns the FS path where the metastore would create the given table. If the table
-   * has a "location" set, that will be returned. Otherwise the path will be resolved
-   * based on the location of the parent database. The metastore folder hierarchy is:
-   * <warehouse directory>/<db name>.db/<table name>
-   * Except for items in the default database which will be:
-   * <warehouse directory>/<table name>
-   * This method handles both of these cases.
-   */
-  public Path getTablePath(Table msTbl) throws TException;
-
-  /**
    * @return the ID of the catalog service from which this catalog most recently
    * loaded.
    */

http://git-wip-us.apache.org/repos/asf/impala/blob/7c0450d6/fe/src/main/java/org/apache/impala/catalog/ImpaladCatalog.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/ImpaladCatalog.java b/fe/src/main/java/org/apache/impala/catalog/ImpaladCatalog.java
index a259bf0..0f2afe3 100644
--- a/fe/src/main/java/org/apache/impala/catalog/ImpaladCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/ImpaladCatalog.java
@@ -231,23 +231,6 @@ public class ImpaladCatalog extends Catalog implements FeCatalog {
     }
   }
 
-
-  @Override // FeCatalog
-  public Path getTablePath(org.apache.hadoop.hive.metastore.api.Table msTbl)
-      throws TException {
-    try (MetaStoreClient msClient = getMetaStoreClient()) {
-      // If the table did not have its path set, build the path based on the the
-      // location property of the parent database.
-      if (msTbl.getSd().getLocation() == null || msTbl.getSd().getLocation().isEmpty()) {
-        String dbLocation =
-            msClient.getHiveClient().getDatabase(msTbl.getDbName()).getLocationUri();
-        return new Path(dbLocation, msTbl.getTableName().toLowerCase());
-      } else {
-        return new Path(msTbl.getSd().getLocation());
-      }
-    }
-  }
-
   /**
    *  Adds the given TCatalogObject to the catalog cache. The update may be ignored
    *  (considered out of date) if:

http://git-wip-us.apache.org/repos/asf/impala/blob/7c0450d6/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java b/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java
index 681d852..37b25b4 100644
--- a/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/local/LocalCatalog.java
@@ -21,8 +21,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.impala.analysis.TableName;
 import org.apache.impala.catalog.AuthorizationPolicy;
 import org.apache.impala.catalog.BuiltinsDb;
@@ -205,18 +203,6 @@ public class LocalCatalog implements FeCatalog {
   }
 
   @Override
-  public Path getTablePath(Table msTbl) {
-    // If the table did not have its path set, build the path based on the
-    // location property of the parent database.
-    if (msTbl.getSd().getLocation() == null || msTbl.getSd().getLocation().isEmpty()) {
-      String dbLocation = getDb(msTbl.getDbName()).getMetaStoreDb().getLocationUri();
-      return new Path(dbLocation, msTbl.getTableName().toLowerCase());
-    } else {
-      return new Path(msTbl.getSd().getLocation());
-    }
-  }
-
-  @Override
   public TUniqueId getCatalogServiceId() {
     throw new UnsupportedOperationException("TODO");
   }

http://git-wip-us.apache.org/repos/asf/impala/blob/7c0450d6/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
index cdf85ed..20d47bf 100644
--- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java
@@ -1009,6 +1009,11 @@ public class CatalogOpExecutor {
       try (MetaStoreClient msClient = catalog_.getMetaStoreClient()) {
         try {
           msClient.getHiveClient().createDatabase(db);
+          // Load the database back from the HMS. It's unfortunate we need two
+          // RPCs here, but otherwise we can't populate the location field of the
+          // DB properly. We'll take the slight chance of a race over the incorrect
+          // behavior of showing no location in 'describe database' (IMPALA-7439).
+          db = msClient.getHiveClient().getDatabase(dbName);
           newDb = catalog_.addDb(dbName, db);
           addSummary(resp, "Database has been created.");
         } catch (AlreadyExistsException e) {

http://git-wip-us.apache.org/repos/asf/impala/blob/7c0450d6/testdata/workloads/functional-query/queries/QueryTest/create-database.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/create-database.test b/testdata/workloads/functional-query/queries/QueryTest/create-database.test
index 765bca8..433c50f 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/create-database.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/create-database.test
@@ -12,6 +12,25 @@ show databases like "$DATABASE_2"
 STRING, STRING
 ====
 ---- QUERY
+# Test that DESCRIBE shows the proper database location
+# for a newly created database (regression test for IMPALA-7439)
+describe database $DATABASE_2
+---- RESULTS
+'$DATABASE_2','$NAMENODE/test-warehouse/$DATABASE_2.db','For testing'
+---- TYPES
+string, string, string
+====
+---- QUERY
+# Test that DESCRIBE EXTENDED also has all of the necessary info.
+describe database extended $DATABASE_2
+---- RESULTS
+'$DATABASE_2','$NAMENODE/test-warehouse/$DATABASE_2.db','For testing'
+'Owner: ','',''
+'','$USER','USER'
+---- TYPES
+string, string, string
+====
+---- QUERY
 # Make sure creating a database with the same name doesn't throw an error when
 # IF NOT EXISTS is specified.
 create database if not exists $DATABASE_2

[2/4] impala git commit: IMPALA-7412: width_bucket() function overflows too easily

Posted by mi...@apache.org.

IMPALA-7412: width_bucket() function overflows too easily

Running the tests of https://gerrit.cloudera.org/#/c/10859/
it turned out that the width_bucket() function overflows
very often.

A common problem is that the function tries to cast the
'num_buckets' parameter to the decimal determined by the
Frontend. When the Frontend determined the precision and
scale of this decimal it only considered the decimal
arguments and ignored everything else. Therefore the
determined precision and scale is often not suitable for
the 'num_buckets' parameter.

WidthBucketImpl() has three decimal arguments, all of them
have the same byte size, precision, and scale. So it is
possible to interpret them as plain integers and still
calculate the proper bucket.

I included the python test cases from IMPALA-7202 developed
by Taras Bobrovytsky.

For performance test I used the following query:

SELECT sum(width_bucket(cast(l_orderkey AS DECIMAL(30, 10)),
           0, 5500000, 1000000))
FROM tpch_parquet.lineitem;

The new implementation executed it in ~0.3 seconds.
The old implementation executed it in ~0.8 seconds.

Change-Id: I728cc05d9aef8d081e6f2da66146f6d7b75dbb57
Reviewed-on: http://gerrit.cloudera.org:8080/11160
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/a8b32dba
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/a8b32dba
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/a8b32dba

Branch: refs/heads/master
Commit: a8b32dbafa1f015c8316f205e32bbdce349f2474
Parents: 0286142
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
Authored: Wed Aug 8 17:17:37 2018 +0200
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Aug 16 22:12:55 2018 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc             |  29 ++----
 be/src/exprs/math-functions-ir.cc     | 156 +++++++++++------------------
 be/src/util/bit-util.h                |  53 +++++++++-
 tests/query_test/test_decimal_fuzz.py |  63 +++++++++++-
 4 files changed, 176 insertions(+), 125 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/a8b32dba/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index d0e3ff3..8cc0e9d 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -5348,16 +5348,11 @@ TEST_F(ExprTest, MathFunctions) {
   // Test when min > max
   TestErrorString("width_bucket(22, 50, 5, 4)",
       "UDF ERROR: Lower bound cannot be greater than or equal to the upper bound\n");
-  // Test max - min will overflow during width_bucket evaluation
-  TestErrorString("width_bucket(11, -9, 99999999999999999999999999999999999999, 4000)",
-      "UDF ERROR: Overflow while evaluating the difference between min_range: -9 and "
-      "max_range: 99999999999999999999999999999999999999\n");
-  // If expr - min overflows during width_bucket evaluation, max - min will also
-  // overflow. Since we evaluate max - min before evaluating expr - min, we will never
-  // end up overflowing expr - min.
-  TestErrorString("width_bucket(1, -99999999999999999999999999999999999999, 9, 40)",
-      "UDF ERROR: Overflow while evaluating the difference between min_range: "
-      "-99999999999999999999999999999999999999 and max_range: 9\n");
+  // IMPALA-7412: Test max - min should not overflow anymore
+  TestValue("width_bucket(11, -9, 99999999999999999999999999999999999999, 4000)",
+      TYPE_BIGINT, 1);
+  TestValue("width_bucket(1, -99999999999999999999999999999999999999, 9, 40)",
+      TYPE_BIGINT, 40);
   // Test when dist_from_min * buckets cannot be stored in a int128_t (overflows)
   // and needs to be stored in a int256_t
   TestValue("width_bucket(8000000000000000000000000000000000000,"
@@ -5380,16 +5375,12 @@ TEST_F(ExprTest, MathFunctions) {
   // max and min value that would require int256_t for evalation
   TestValue("width_bucket(10000000000000000000000000000000000000, 1,"
             "99999999999999999999999999999999999999, 15)", TYPE_BIGINT, 2);
-  // IMPALA-7242/IMPALA-7243: check for overflow when converting IntVal to DecimalValue
-  TestErrorString("width_bucket(cast(-0.10 as decimal(37,30)), cast(-0.36028797018963968 "
+  // IMPALA-7412: These should not overflow anymore
+  TestValue("width_bucket(cast(-0.10 as decimal(37,30)), cast(-0.36028797018963968 "
       "as decimal(25,25)), cast(9151517.4969773200562764155787276999832"
-      "as decimal(38,31)), 1328180220)",
-      "UDF ERROR: Overflow while representing the num_buckets:1328180220 as a "
-      "DecimalVal\n");
-  TestErrorString("width_bucket(cast(9 as decimal(10,7)), cast(-60000 as decimal(11,6)), "
-      "cast(10 as decimal(7,5)), 249895273);",
-      "UDF ERROR: Overflow while representing the num_buckets:249895273 as a "
-      "DecimalVal\n");
+      "as decimal(38,31)), 1328180220)", TYPE_BIGINT, 38);
+  TestValue("width_bucket(cast(9 as decimal(10,7)), cast(-60000 as decimal(11,6)), "
+      "cast(10 as decimal(7,5)), 249895273);", TYPE_BIGINT, 249891109);
   // Run twice to test deterministic behavior.
   for (uint32_t seed : {0, 1234}) {
     stringstream rand, random;

http://git-wip-us.apache.org/repos/asf/impala/blob/a8b32dba/be/src/exprs/math-functions-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/math-functions-ir.cc b/be/src/exprs/math-functions-ir.cc
index 28c9e1e..7103a10 100644
--- a/be/src/exprs/math-functions-ir.cc
+++ b/be/src/exprs/math-functions-ir.cc
@@ -457,57 +457,32 @@ DoubleVal MathFunctions::FmodDouble(FunctionContext* ctx, const DoubleVal& a,
 
 // The bucket_number is evaluated using the following formula:
 //
-//   bucket_number = dist_from_min * num_buckets / range_size
+//   bucket_number = dist_from_min * num_buckets / range_size + 1
 //      where -
 //        dist_from_min = expr - min_range
 //        range_size = max_range - min_range
-//        buckets = number of buckets
+//        num_buckets = number of buckets
 //
-// The results of the above subtractions are stored in Decimal16Value to avoid an overflow
-// in the following cases:
-//   case 1:
-//      T1 is decimal8Value
-//         When evaluating this particular expression
-//            dist_from_min = expr - min_range
-//         If expr is a max positive value which can be represented in decimal8Value and
-//         min_range < 0 the resulting dist_from_min can be represented in decimal16Val
-//         without overflowing
-//   case 2:
-//      T1 is decimal16Value
-//         Subtracting a negative min_range from expr can result in an overflow in which
-//         case the function errors out. There is no decimal32Val to handle this. So
-//         storing it in decimal16Value.
-//   case 3:
-//      T1 is decimal4Value
-//         We can store the results in a decimal8Value. But this change hard codes to
-//         store the result in decimal16Val for now to be compatible with the other
-//         decimal*Vals
+// Since expr, min_range, and max_range are all decimals with the same
+// byte size, precision, and scale we can interpret them as plain integers
+// and still calculate the proper bucket.
 //
-// The result of this multiplication dist_from_min * buckets is stored as a int256_t
-// if storing it in a int128_t would overflow.
+// There are some possibilities of overflowing during the calculation:
+// range_size = max_range - min_range
+// dist_from_min = expr - min_range
+// dist_from_min * num_buckets
 //
-// To perform the division, range_size is scaled up. The scale and precision of the
-// numerator and denominator are adjusted to be the same. This avoids the need to compute
-// the resulting scale and precision.
+// For all the above cases we use a bigger integer type provided by the
+// BitUtil::DoubleWidth<> metafunction.
 template <class  T1>
 BigIntVal MathFunctions::WidthBucketImpl(FunctionContext* ctx,
     const T1& expr, const T1& min_range,
     const T1& max_range, const IntVal& num_buckets) {
-  // FE casts expr, min_range and max_range to be of the scale and precision
-  int input_scale = ctx->impl()->GetConstFnAttr(FunctionContextImpl::ARG_TYPE_SCALE, 1);
-  int input_precision = ctx->impl()->GetConstFnAttr(
-      FunctionContextImpl::ARG_TYPE_PRECISION, 1);
-
-  bool overflow = false;
-  Decimal16Value range_size = max_range.template Subtract<int128_t>(input_scale,
-      min_range, input_scale, input_precision, input_scale, false, &overflow);
-  if (UNLIKELY(overflow)) {
-    ostringstream error_msg;
-    error_msg << "Overflow while evaluating the difference between min_range: " <<
-        min_range.value() << " and max_range: " << max_range.value();
-    ctx->SetError(error_msg.str().c_str());
-    return BigIntVal::null();
-  }
+  auto expr_val = expr.value();
+  using ActualType = decltype(expr_val);
+  auto min_range_val = min_range.value();
+  auto max_range_val = max_range.value();
+  auto num_buckets_val = static_cast<ActualType>(num_buckets.val);
 
   if (UNLIKELY(num_buckets.val <= 0)) {
     ostringstream error_msg;
@@ -516,73 +491,58 @@ BigIntVal MathFunctions::WidthBucketImpl(FunctionContext* ctx,
     return BigIntVal::null();
   }
 
-  if (UNLIKELY(min_range >= max_range)) {
+  if (UNLIKELY(min_range_val >= max_range_val)) {
     ctx->SetError("Lower bound cannot be greater than or equal to the upper bound");
     return BigIntVal::null();
   }
 
-  if (UNLIKELY(expr < min_range)) return 0;
-
-  if (UNLIKELY(expr >= max_range)) {
-    BigIntVal result;
-    result.val = num_buckets.val;
-    ++result.val;
-    return result;
+  if (expr_val < min_range_val) return 0;
+  if (expr_val >= max_range_val) {
+    return BigIntVal(static_cast<int64_t>(num_buckets.val) + 1);
   }
 
-  Decimal16Value dist_from_min = expr.template Subtract<int128_t>(input_scale,
-      min_range, input_scale, input_precision, input_scale, false, &overflow);
-  DCHECK_EQ(overflow, false);
-
-  Decimal16Value buckets = Decimal16Value::FromInt(input_precision, input_scale,
-      num_buckets.val, &overflow);
-
-  if (UNLIKELY(overflow)) {
-    stringstream error_msg;
-    error_msg << "Overflow while representing the num_buckets:" << num_buckets.val
-        << " as a DecimalVal";
-    ctx->SetError(error_msg.str().c_str());
-    return BigIntVal::null();
-  }
-  bool needs_int256 = false;
-  // Check if dist_from_min * buckets would overflow and if there is a need to
-  // store the intermediate results in int256_t to avoid an overflows
-  // Check if scaling up range size overflows and if there is a need to store the
-  // intermediate results in int256_t to avoid the overflow
-  if (UNLIKELY(BitUtil::CountLeadingZeros(abs(buckets.value())) +
-      BitUtil::CountLeadingZeros(abs(dist_from_min.value())) <= 128 ||
-      BitUtil::CountLeadingZeros(range_size.value()) +
-      detail::MinLeadingZerosAfterScaling(BitUtil::CountLeadingZeros(
-      range_size.value()), input_scale) <= 128)) {
-    needs_int256 = true;
+  bool bigger_type_needed = false;
+  // It is likely that this if stmt can be evaluated during codegen because
+  // 'max_range' and 'min_range' are almost certainly constant expressions:
+  if (max_range_val >= 0 && min_range_val < 0) {
+    if (static_cast<UnsignedType<ActualType>>(max_range_val) +
+        static_cast<UnsignedType<ActualType>>(abs(min_range_val)) >=
+        static_cast<UnsignedType<ActualType>>(BitUtil::Max<ActualType>())) {
+      bigger_type_needed = true;
+    }
   }
 
-  int128_t result;
-  if (needs_int256) {
-    // resulting scale should be 2 * input_scale as per multiplication rules
-    int256_t x = ConvertToInt256(buckets.value()) * ConvertToInt256(
-        dist_from_min.value());
-
-    // Since "range_size" and "x" have different scales, the divison would require
-    // evaluating the resulting scale. To avoid this we scale up the denominator to
-    // match the scale of the numerator.
-    int256_t y = DecimalUtil::MultiplyByScale<int256_t>(ConvertToInt256(
-        range_size.value()), input_scale, false);
-    result = ConvertToInt128(x / y, DecimalUtil::MAX_UNSCALED_DECIMAL16,
-        &overflow);
-    DCHECK_EQ(overflow, false);
+  auto MultiplicationOverflows = [](ActualType lhs, ActualType rhs) {
+    DCHECK(lhs > 0 && rhs > 0);
+    using ActualType = decltype(lhs);
+    return BitUtil::CountLeadingZeros(lhs) + BitUtil::CountLeadingZeros(rhs) <=
+        BitUtil::UnsignedWidth<ActualType>() + 1;
+  };
+
+  // It is likely that this can be evaluated during codegen:
+  bool multiplication_can_overflow = bigger_type_needed ||  MultiplicationOverflows(
+      max_range_val - min_range_val, num_buckets_val);
+  // 'expr_val' is not likely to be a constant expression, so this almost certainly
+  // needs runtime evaluation if bigger_type_needed is false:
+  bigger_type_needed = multiplication_can_overflow &&  MultiplicationOverflows(
+      expr_val - min_range_val, num_buckets_val);
+
+  auto BucketFunc = [](auto element, auto min_rng, auto max_rng, auto buckets) {
+    auto range_size = max_rng - min_rng;
+    auto dist_from_min = element - min_rng;
+    auto ret = dist_from_min * buckets / range_size;
+    return BigIntVal(static_cast<int64_t>(ret) + 1);
+  };
+
+  if (bigger_type_needed) {
+    using BiggerType = typename DoubleWidth<ActualType>::type;
+
+    return BucketFunc(static_cast<BiggerType>(expr_val),
+        static_cast<BiggerType>(min_range_val), static_cast<BiggerType>(max_range_val),
+        static_cast<BiggerType>(num_buckets.val));
   } else {
-    // resulting scale should be 2 * input_scale as per multiplication rules
-    int128_t x = buckets.value() * dist_from_min.value();
-
-    // Since "range_size" and "x" have different scales, the divison would require
-    // evaluating the resulting scale. To avoid this we scale up the denominator to
-    // match the scale of the numerator.
-    int128_t y = DecimalUtil::MultiplyByScale<int128_t>(range_size.value(),
-        input_scale, false);
-    result = x / y; // NOLINT: clang-tidy thinks y may equal zero here.
+    return BucketFunc(expr_val, min_range_val, max_range_val, num_buckets.val);
   }
-  return (BigIntVal(abs(result) + 1));
 }
 
 BigIntVal MathFunctions::WidthBucket(FunctionContext* ctx, const DecimalVal& expr,

http://git-wip-us.apache.org/repos/asf/impala/blob/a8b32dba/be/src/util/bit-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/bit-util.h b/be/src/util/bit-util.h
index 8a65509..d07dd9d 100644
--- a/be/src/util/bit-util.h
+++ b/be/src/util/bit-util.h
@@ -28,8 +28,7 @@
 #include <climits>
 #include <limits>
 #include <typeinfo>
-
-#include <boost/type_traits/make_unsigned.hpp>
+#include <type_traits>
 
 #include "common/compiler-util.h"
 #include "gutil/bits.h"
@@ -39,7 +38,40 @@
 
 namespace impala {
 
-using boost::make_unsigned;
+/// Nested 'type' corresponds to the unsigned version of T.
+template <typename T>
+struct MakeUnsigned {
+  using type = std::make_unsigned_t<T>;
+};
+
+template <>
+struct MakeUnsigned<int128_t> {
+  using type = __uint128_t;
+};
+
+template <typename T>
+using UnsignedType = typename MakeUnsigned<T>::type;
+
+// Doubles the width of integer types (e.g. int32_t -> int64_t).
+// Currently only works with a few signed types.
+// Feel free to extend it to other types as well.
+template <typename T>
+struct DoubleWidth {};
+
+template <>
+struct DoubleWidth<int32_t> {
+  using type = int64_t;
+};
+
+template <>
+struct DoubleWidth<int64_t> {
+  using type = int128_t;
+};
+
+template <>
+struct DoubleWidth<int128_t> {
+  using type = int256_t;
+};
 
 /// Utility class to do standard bit tricks
 /// TODO: is this in boost or something else like that?
@@ -59,6 +91,17 @@ class BitUtil {
         std::is_same<CVR_REMOVED, __int128>::value ? 127 : -1;
   }
 
+  /// Returns the max value that can be represented in T.
+  template<typename T, typename CVR_REMOVED = typename std::decay<T>::type,
+      typename std::enable_if<std::is_integral<CVR_REMOVED> {}||
+                              std::is_same<CVR_REMOVED, __int128> {}, int>::type = 0>
+  constexpr static inline CVR_REMOVED Max() {
+    return std::is_integral<CVR_REMOVED>::value ?
+        std::numeric_limits<CVR_REMOVED>::max() :
+        std::is_same<CVR_REMOVED, __int128>::value ?
+            static_cast<UnsignedType<CVR_REMOVED>>(-1) / 2 : -1;
+  }
+
   /// Return an integer signifying the sign of the value, returning +1 for
   /// positive integers (and zero), -1 for negative integers.
   /// The extra shift is to silence GCC warnings about full width shift on
@@ -168,7 +211,7 @@ class BitUtil {
   template<typename T>
   static inline int PopcountSigned(T v) {
     // Converting to same-width unsigned then extending preserves the bit pattern.
-    return BitUtil::Popcount(static_cast<typename make_unsigned<T>::type>(v));
+    return BitUtil::Popcount(static_cast<UnsignedType<T>>(v));
   }
 
   /// Returns the 'num_bits' least-significant bits of 'v'.
@@ -249,7 +292,7 @@ class BitUtil {
   template <typename T>
   constexpr static T ShiftRightLogical(T v, int shift) {
     // Conversion to unsigned ensures most significant bits always filled with 0's
-    return static_cast<typename make_unsigned<T>::type>(v) >> shift;
+    return static_cast<UnsignedType<T>>(v) >> shift;
   }
 
   /// Get an specific bit of a numeric type

http://git-wip-us.apache.org/repos/asf/impala/blob/a8b32dba/tests/query_test/test_decimal_fuzz.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_decimal_fuzz.py b/tests/query_test/test_decimal_fuzz.py
index a129e33..1433ec3 100644
--- a/tests/query_test/test_decimal_fuzz.py
+++ b/tests/query_test/test_decimal_fuzz.py
@@ -30,6 +30,9 @@ from tests.common.test_vector import ImpalaTestDimension, ImpalaTestMatrix
 
 class TestDecimalFuzz(ImpalaTestSuite):
 
+  # Impala's max precision for decimals is 38, so we should have the same in the tests
+  decimal.getcontext().prec = 38
+
   @classmethod
   def get_workload(cls):
     return 'functional-query'
@@ -200,7 +203,7 @@ class TestDecimalFuzz(ImpalaTestSuite):
         return True
     return False
 
-  def execute_one(self):
+  def execute_one_decimal_op(self):
     '''Executes a single query and compares the result to a result that we computed in
     Python.'''
     op = random.choice(['+', '-', '*', '/', '%'])
@@ -243,6 +246,60 @@ class TestDecimalFuzz(ImpalaTestSuite):
         expected_result = None
       assert self.result_equals(expected_result, result)
 
-  def test_fuzz(self, vector):
+  def test_decimal_ops(self, vector):
+    for _ in xrange(self.iterations):
+      self.execute_one_decimal_op()
+
+  def width_bucket(self, val, min_range, max_range, num_buckets):
+    # Multiplying the values by 10**40 guarantees that the numbers can be converted
+    # to int without losing information.
+    val_int = int(decimal.Decimal(val) * 10**40)
+    min_range_int = int(decimal.Decimal(min_range) * 10**40)
+    max_range_int = int(decimal.Decimal(max_range) * 10**40)
+
+    if min_range_int >= max_range_int:
+      return None
+    if val_int < min_range_int:
+      return 0
+    if val_int > max_range_int:
+      return num_buckets + 1
+
+    range_size = max_range_int - min_range_int
+    dist_from_min = val_int - min_range_int
+    return (num_buckets * dist_from_min) / range_size + 1
+
+  def execute_one_width_bucket(self):
+    val, val_prec, val_scale = self.get_decimal()
+    min_range, min_range_prec, min_range_scale = self.get_decimal()
+    max_range, max_range_prec, max_range_scale = self.get_decimal()
+    num_buckets = random.randint(1, 2147483647)
+
+    query = ('select width_bucket('
+        'cast({val} as decimal({val_prec},{val_scale})), '
+        'cast({min_range} as decimal({min_range_prec},{min_range_scale})), '
+        'cast({max_range} as decimal({max_range_prec},{max_range_scale})), '
+        '{num_buckets})')
+
+    query = query.format(val=val, val_prec=val_prec, val_scale=val_scale,
+        min_range=min_range, min_range_prec=min_range_prec,
+        min_range_scale=min_range_scale,
+        max_range=max_range, max_range_prec=max_range_prec,
+        max_range_scale=max_range_scale,
+        num_buckets=num_buckets)
+
+    expected_result = self.width_bucket(val, min_range, max_range, num_buckets)
+    if not expected_result:
+      return
+
+    try:
+      result = self.execute_scalar(query, query_options={'decimal_v2': 'true'})
+      assert int(result) == expected_result
+    except ImpalaBeeswaxException as e:
+      if "You need to wrap the arguments in a CAST" not in str(e):
+        # Sometimes the decimal inputs are incompatible with each other, so it's ok
+        # to ignore this error.
+        raise e
+
+  def test_width_bucket(self, vector):
     for _ in xrange(self.iterations):
-      self.execute_one()
+      self.execute_one_width_bucket()