You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ph...@apache.org on 2018/01/23 00:42:44 UTC

[1/5] impala git commit: IMPALA-5654: Disallow setting Kudu table name in CREATE TABLE

Repository: impala
Updated Branches:
  refs/heads/2.x 4afabd4e3 -> 1bb3547e4


IMPALA-5654: Disallow setting Kudu table name in CREATE TABLE

This change disallows explicitly setting the Kudu table name property
for managed Kudu tables in a CREATE TABLE statement. The Kudu table
name property gets a generated value as the following:
'impala::db_name.table_name' where table_name is the one given in
the CREATE TABLE statement.
Providing the Kudu table name property when creating a managed Kudu
table results in an error without creating the table. E.g.:
CREATE TABLE t (i INT) STORED AS KUDU
  TBLPROPERTIES('kudu.table_name'='some_name');

Alongside the CREATE TABLE statement also the ALTER TABLE statement
is changed not to allow the modification of Kudu.table_name of
managed Kudu tables.

Change-Id: Ieca037498abf8f5fde67b77e824b720482cdbe6f
Reviewed-on: http://gerrit.cloudera.org:8080/8820
Reviewed-by: Dimitris Tsirogiannis <dt...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1bb3547e
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1bb3547e
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1bb3547e

Branch: refs/heads/2.x
Commit: 1bb3547e4e5791435a3c1a3014cd7d4296c17073
Parents: b008b77
Author: Gabor Kaszab <ga...@cloudera.com>
Authored: Tue Dec 12 09:46:21 2017 +0100
Committer: Philip Zeyliger <ph...@cloudera.com>
Committed: Mon Jan 22 16:41:16 2018 -0800

----------------------------------------------------------------------
 .../analysis/AlterTableSetTblProperties.java    | 36 ++++++---
 .../apache/impala/analysis/CreateTableStmt.java | 34 ++++++--
 .../org/apache/impala/analysis/TableDef.java    |  9 +++
 .../apache/impala/analysis/AnalyzeDDLTest.java  | 22 ++++--
 .../org/apache/impala/analysis/ToSqlTest.java   |  6 +-
 .../queries/QueryTest/kudu_alter.test           | 73 ++++++++---------
 tests/query_test/test_kudu.py                   | 83 ++++++++++----------
 7 files changed, 157 insertions(+), 106 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/1bb3547e/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java
index fb83f24..732f5f6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java
+++ b/fe/src/main/java/org/apache/impala/analysis/AlterTableSetTblProperties.java
@@ -91,17 +91,7 @@ public class AlterTableSetTblProperties extends AlterTableSetStmt {
           hive_metastoreConstants.META_TABLE_STORAGE));
     }
 
-    if (getTargetTable() instanceof KuduTable && analyzer.getAuthzConfig().isEnabled()) {
-      // Checking for 'EXTERNAL' is case-insensitive, see IMPALA-5637.
-      boolean setsExternal =
-          MetaStoreUtil.findTblPropKeyCaseInsensitive(tblProperties_, "EXTERNAL") != null;
-      if (setsExternal || tblProperties_.containsKey(KuduTable.KEY_MASTER_HOSTS)) {
-        String authzServer = analyzer.getAuthzConfig().getServerName();
-        Preconditions.checkNotNull(authzServer);
-        analyzer.registerPrivReq(new PrivilegeRequestBuilder().onServer(
-            authzServer).all().toRequest());
-      }
-    }
+    if (getTargetTable() instanceof KuduTable) analyzeKuduTable(analyzer);
 
     // Check avro schema when it is set in avro.schema.url or avro.schema.literal to
     // avoid potential metadata corruption (see IMPALA-2042).
@@ -121,6 +111,30 @@ public class AlterTableSetTblProperties extends AlterTableSetStmt {
     analyzeSortColumns(getTargetTable(), tblProperties_);
   }
 
+  private void analyzeKuduTable(Analyzer analyzer) throws AnalysisException {
+    // Checking for 'EXTERNAL' is case-insensitive, see IMPALA-5637.
+    String keyForExternalProperty =
+        MetaStoreUtil.findTblPropKeyCaseInsensitive(tblProperties_, "EXTERNAL");
+
+    // Throw error if kudu.table_name is provided for managed Kudu tables
+    // TODO IMPALA-6375: Allow setting kudu.table_name for managed Kudu tables
+    // if the 'EXTERNAL' property is set to TRUE in the same step.
+    if (!Table.isExternalTable(table_.getMetaStoreTable())) {
+      AnalysisUtils.throwIfNotNull(tblProperties_.get(KuduTable.KEY_TABLE_NAME),
+          String.format("Not allowed to set '%s' manually for managed Kudu tables .",
+              KuduTable.KEY_TABLE_NAME));
+    }
+    if (analyzer.getAuthzConfig().isEnabled()) {
+      if (keyForExternalProperty != null ||
+          tblProperties_.containsKey(KuduTable.KEY_MASTER_HOSTS)) {
+        String authzServer = analyzer.getAuthzConfig().getServerName();
+        Preconditions.checkNotNull(authzServer);
+        analyzer.registerPrivReq(new PrivilegeRequestBuilder().onServer(
+            authzServer).all().toRequest());
+      }
+    }
+  }
+
   /**
    * Check that Avro schema provided in avro.schema.url or avro.schema.literal is valid
    * Json and contains only supported Impala types. If both properties are set, then

http://git-wip-us.apache.org/repos/asf/impala/blob/1bb3547e/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
index 5810a40..b4d59e8 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
@@ -41,6 +41,7 @@ import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 import com.google.common.primitives.Ints;
+import com.google.common.collect.Maps;
 
 /**
  * Represents a CREATE TABLE statement.
@@ -111,6 +112,12 @@ public class CreateTableStmt extends StatementBase {
   Map<String, String> getSerdeProperties() { return tableDef_.getSerdeProperties(); }
   public THdfsFileFormat getFileFormat() { return tableDef_.getFileFormat(); }
   RowFormat getRowFormat() { return tableDef_.getRowFormat(); }
+  private String getGeneratedKuduTableName() {
+    return tableDef_.getGeneratedKuduTableName();
+  }
+  private void setGeneratedKuduTableName(String tableName) {
+    tableDef_.setGeneratedKuduTableName(tableName);
+  }
 
   // Only exposed for ToSqlUtils. Returns the list of primary keys declared by the user
   // at the table level. Note that primary keys may also be declared in column
@@ -158,7 +165,11 @@ public class CreateTableStmt extends StatementBase {
     params.setFile_format(getFileFormat());
     params.setIf_not_exists(getIfNotExists());
     params.setSort_columns(getSortColumns());
-    params.setTable_properties(getTblProperties());
+    params.setTable_properties(Maps.newHashMap(getTblProperties()));
+    if (!getGeneratedKuduTableName().isEmpty()) {
+      params.getTable_properties().put(KuduTable.KEY_TABLE_NAME,
+          getGeneratedKuduTableName());
+    }
     params.setSerde_properties(getSerdeProperties());
     for (KuduPartitionParam d: getKuduPartitionParams()) {
       params.addToPartition_by(d.toThrift());
@@ -294,13 +305,8 @@ public class CreateTableStmt extends StatementBase {
    * Analyzes and checks parameters specified for managed Kudu tables.
    */
   private void analyzeManagedKuduTableParams(Analyzer analyzer) throws AnalysisException {
-    // If no Kudu table name is specified in tblproperties, generate one using the
-    // current database as a prefix to avoid conflicts in Kudu.
-    // TODO: Disallow setting this manually for managed tables (IMPALA-5654).
-    if (!getTblProperties().containsKey(KuduTable.KEY_TABLE_NAME)) {
-      getTblProperties().put(KuduTable.KEY_TABLE_NAME,
-          KuduUtil.getDefaultCreateKuduTableName(getDb(), getTbl()));
-    }
+    analyzeManagedKuduTableName();
+
     // Check column types are valid Kudu types
     for (ColumnDef col: getColumnDefs()) {
       try {
@@ -338,6 +344,18 @@ public class CreateTableStmt extends StatementBase {
   }
 
   /**
+   * Generates a Kudu table name based on the target database and table and stores
+   * it in TableDef.generatedKuduTableName_. Throws if the Kudu table
+   * name was given manually via TBLPROPERTIES.
+   */
+  private void analyzeManagedKuduTableName() throws AnalysisException {
+    AnalysisUtils.throwIfNotNull(getTblProperties().get(KuduTable.KEY_TABLE_NAME),
+        String.format("Not allowed to set '%s' manually for managed Kudu tables .",
+            KuduTable.KEY_TABLE_NAME));
+    setGeneratedKuduTableName(KuduUtil.getDefaultCreateKuduTableName(getDb(), getTbl()));
+  }
+
+  /**
    * Analyzes the partitioning schemes specified in the CREATE TABLE statement.
    */
   private void analyzeKuduPartitionParams(Analyzer analyzer) throws AnalysisException {

http://git-wip-us.apache.org/repos/asf/impala/blob/1bb3547e/fe/src/main/java/org/apache/impala/analysis/TableDef.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/TableDef.java b/fe/src/main/java/org/apache/impala/analysis/TableDef.java
index 178a976..915bbba 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TableDef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TableDef.java
@@ -88,6 +88,9 @@ class TableDef {
   // True if analyze() has been called.
   private boolean isAnalyzed_ = false;
 
+  //Kudu table name generated during analysis for managed Kudu tables
+  private String generatedKuduTableName_ = "";
+
   // END: Members that need to be reset()
   /////////////////////////////////////////
 
@@ -160,6 +163,7 @@ class TableDef {
     dataLayout_.reset();
     columnDefs_.clear();
     isAnalyzed_ = false;
+    generatedKuduTableName_ = "";
   }
 
   public TableName getTblName() {
@@ -183,6 +187,11 @@ class TableDef {
   List<ColumnDef> getPrimaryKeyColumnDefs() { return primaryKeyColDefs_; }
   boolean isExternal() { return isExternal_; }
   boolean getIfNotExists() { return ifNotExists_; }
+  String getGeneratedKuduTableName() { return generatedKuduTableName_; }
+  void setGeneratedKuduTableName(String tableName) {
+    Preconditions.checkNotNull(tableName);
+    generatedKuduTableName_ = tableName;
+  }
   List<KuduPartitionParam> getKuduPartitionParams() {
     return dataLayout_.getKuduPartitionParams();
   }

http://git-wip-us.apache.org/repos/asf/impala/blob/1bb3547e/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index 72b7f58..895d8c5 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -2079,9 +2079,16 @@ public class AnalyzeDDLTest extends FrontendTestBase {
         "Cannot change the type of a Kudu column using an ALTER TABLE CHANGE COLUMN " +
         "statement: (INT vs BIGINT)");
 
-    // Rename the underlying Kudu table
-    AnalyzesOk("ALTER TABLE functional_kudu.testtbl SET " +
-        "TBLPROPERTIES ('kudu.table_name' = 'Hans')");
+    // Rename the underlying Kudu table is not supported for managed Kudu tables
+    AnalysisError("ALTER TABLE functional_kudu.testtbl SET " +
+        "TBLPROPERTIES ('kudu.table_name' = 'Hans')",
+        "Not allowed to set 'kudu.table_name' manually for managed Kudu tables");
+
+    // TODO IMPALA-6375: Allow setting kudu.table_name for managed Kudu tables
+    // if the 'EXTERNAL' property is set to TRUE in the same step.
+    AnalysisError("ALTER TABLE functional_kudu.testtbl SET " +
+        "TBLPROPERTIES ('EXTERNAL' = 'TRUE','kudu.table_name' = 'Hans')",
+        "Not allowed to set 'kudu.table_name' manually for managed Kudu tables");
 
     // ALTER TABLE RENAME TO
     AnalyzesOk("ALTER TABLE functional_kudu.testtbl RENAME TO new_testtbl");
@@ -2242,11 +2249,14 @@ public class AnalyzeDDLTest extends FrontendTestBase {
         "partition by range(unknown_column) (partition value = 'abc') stored as kudu",
         "Column 'unknown_column' in 'RANGE (unknown_column) (PARTITION VALUE = 'abc')' " +
         "is not a key column. Only key columns can be used in PARTITION BY");
-    // Kudu table name is specified in tblproperties
+    // Kudu num_tablet_replicas is specified in tblproperties
     AnalyzesOk("create table tab (x int primary key) partition by hash (x) " +
-        "partitions 8 stored as kudu tblproperties ('kudu.table_name'='tab_1'," +
-        "'kudu.num_tablet_replicas'='1'," +
+        "partitions 8 stored as kudu tblproperties ('kudu.num_tablet_replicas'='1'," +
         "'kudu.master_addresses' = '127.0.0.1:8080, 127.0.0.1:8081')");
+    // Kudu table name is specified in tblproperties resulting in an error
+    AnalysisError("create table tab (x int primary key) partition by hash (x) " +
+        "partitions 8 stored as kudu tblproperties ('kudu.table_name'='tab')",
+        "Not allowed to set 'kudu.table_name' manually for managed Kudu tables");
     // No port is specified in kudu master address
     AnalyzesOk("create table tdata_no_port (id int primary key, name string, " +
         "valf float, vali bigint) partition by range(id) (partition values <= 10, " +

http://git-wip-us.apache.org/repos/asf/impala/blob/1bb3547e/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
index 6105925..bb6a39b 100644
--- a/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/ToSqlTest.java
@@ -92,12 +92,12 @@ public class ToSqlTest extends FrontendTestBase {
   }
 
   private void testToSql(String query, String defaultDb, String expected,
-      boolean ignore_whitespace) {
+      boolean ignoreWhitespace) {
     String actual = null;
     try {
       ParseNode node = AnalyzesOk(query, createAnalyzer(defaultDb));
       actual = node.toSql();
-      if (ignore_whitespace) {
+      if (ignoreWhitespace) {
         // Transform whitespace to single space.
         actual = actual.replace('\n', ' ').replaceAll(" +", " ").trim();
       }
@@ -315,7 +315,6 @@ public class ToSqlTest extends FrontendTestBase {
         "CREATE TABLE default.p ( a BIGINT PRIMARY KEY, b TIMESTAMP " +
         "DEFAULT '1987-05-19' ) PARTITION BY HASH (a) PARTITIONS 3 " +
         "STORED AS KUDU TBLPROPERTIES ('kudu.master_addresses'='foo', " +
-        "'kudu.table_name'='impala::default.p', " +
         "'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler')", true);
   }
 
@@ -348,7 +347,6 @@ public class ToSqlTest extends FrontendTestBase {
         "CREATE TABLE default.p PRIMARY KEY (a, b) PARTITION BY HASH (a) PARTITIONS 3, " +
         "RANGE (b) (PARTITION VALUE = 1) STORED AS KUDU TBLPROPERTIES " +
         "('kudu.master_addresses'='foo', " +
-        "'kudu.table_name'='impala::default.p', " +
         "'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler') AS " +
         "SELECT int_col a, bigint_col b FROM functional.alltypes", true);
   }

http://git-wip-us.apache.org/repos/asf/impala/blob/1bb3547e/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
index 04f104f..305ccf1 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
@@ -344,20 +344,6 @@ select id, last_name from tbl_to_alter
 INT,STRING
 ====
 ---- QUERY
-# Rename the underlying Kudu table
-alter table tbl_to_alter set tblproperties('kudu.table_name'='kudu_tbl_to_alter')
----- RESULTS
-'Updated table.'
-====
----- QUERY
-# Create a new table and try to rename to an existing kudu table
-create table copy_of_tbl (a int primary key) partition by hash (a) partitions 3
-  stored as kudu tblproperties('kudu.table_name'='copy_of_tbl');
-alter table copy_of_tbl set tblproperties('kudu.table_name'='kudu_tbl_to_alter')
----- CATCH
-ImpalaRuntimeException: Error renaming Kudu table copy_of_tbl to kudu_tbl_to_alter
-====
----- QUERY
 # Ensure the Kudu table is accessible
 select count(*) from tbl_to_alter
 ---- RESULTS
@@ -379,34 +365,49 @@ select count(*) from kudu_tbl_to_alter
 BIGINT
 ====
 ---- QUERY
-# Rename Kudu table and insert a number of rows
-alter table copy_of_tbl set tblproperties('kudu.table_name'='shared_kudu_tbl');
-insert into copy_of_tbl values (1), (2), (3)
+# Create an external Kudu table pointing to an existing Kudu table
+create external table external_tbl stored as kudu
+  tblproperties('kudu.table_name'='impala::$DATABASE.tbl_to_alter');
+select count(*) from external_tbl
+---- RESULTS
+5
+---- TYPES
+BIGINT
+====
+---- QUERY
+# Insert an item into the table pointed by the external Kudu table
+insert into kudu_tbl_to_alter (id, name, new_col1, new_col2)
+  values (7, 'test', 4, 400)
 ---- RUNTIME_PROFILE
-NumModifiedRows: 3
+NumModifiedRows: 1
 NumRowErrors: 0
 ---- LABELS
-A
----- DML_RESULTS: copy_of_tbl
-1
-2
-3
+ID, NAME, NEW_COL1, NEW_COL2, LAST_NAME, NEW_COL4
+---- DML_RESULTS: kudu_tbl_to_alter
+2,'test',1,100,'NULL',-1
+3,'test',10,1000,'NULL',-1
+4,'test',1,100,'NULL',NULL
+5,'test',2,200,'names',1
+6,'test',3,300,'NULL',-1
+7,'test',4,400,'NULL',-1
 ---- TYPES
-INT
+INT,STRING,INT,BIGINT,STRING,INT
 ====
 ---- QUERY
-# Create an external table
-create external table external_tbl stored as kudu
-tblproperties('kudu.table_name'='kudu_tbl_to_alter');
-select count(*) from external_tbl
+# After an insert into the underlying table check if the row count of the
+# external table pointing to it also increased.
+select count(*) from external_tbl;
 ---- RESULTS
-5
+6
 ---- TYPES
 BIGINT
 ====
 ---- QUERY
 # Change the external table to point to a different Kudu table
-alter table external_tbl set tblproperties('kudu.table_name'='shared_kudu_tbl');
+create table temp_kudu_table (i int primary key) stored as kudu;
+insert into temp_kudu_table values (1), (2), (3);
+alter table external_tbl set
+  tblproperties('kudu.table_name'='impala::$DATABASE.temp_kudu_table');
 select count(*) from external_tbl
 ---- RESULTS
 3
@@ -464,7 +465,7 @@ alter table kudu_tbl_to_alter alter column new_col1 set default 10 + 5;
 ---- QUERY
 # check that the above defaults are applied
 insert into kudu_tbl_to_alter (id, last_name, new_col4)
-values (7, 'test', 8)
+values (8, 'test', 8)
 ---- RUNTIME_PROFILE
 NumModifiedRows: 1
 NumRowErrors: 0
@@ -474,7 +475,8 @@ NumRowErrors: 0
 4,'test',1,100,'NULL',NULL
 5,'test',2,200,'names',1
 6,'test',3,300,'NULL',-1
-7,'name_default',15,1000,'test',8
+7,'test',4,400,'NULL',-1
+8,'name_default',15,1000,'test',8
 ---- LABELS
 ID,NAME,NEW_COL1,NEW_COL2,LAST_NAME,NEW_COL4
 ---- TYPES
@@ -530,7 +532,7 @@ STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING,STRING
 ---- QUERY
 # check that we can insert and scan after the storage attribute changes
 insert into kudu_tbl_to_alter (id, name, new_col1, new_col2, last_name, new_col4)
-values (8, 'nine', 10, 11, 'twelve', 13)
+values (9, 'nine', 10, 11, 'twelve', 13)
 ---- RUNTIME_PROFILE
 NumModifiedRows: 1
 NumRowErrors: 0
@@ -540,8 +542,9 @@ NumRowErrors: 0
 4,'test',1,100,'NULL',NULL
 5,'test',2,200,'names',1
 6,'test',3,300,'NULL',-1
-7,'name_default',15,1000,'test',8
-8,'nine',10,11,'twelve',13
+7,'test',4,400,'NULL',-1
+8,'name_default',15,1000,'test',8
+9,'nine',10,11,'twelve',13
 ---- LABELS
 ID,NAME,NEW_COL1,NEW_COL2,LAST_NAME,NEW_COL4
 ---- TYPES

http://git-wip-us.apache.org/repos/asf/impala/blob/1bb3547e/tests/query_test/test_kudu.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index f8a761a..ddf59f2 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -318,18 +318,6 @@ class TestKuduOperations(KuduTestSuite):
     cursor.execute("SELECT * FROM %s.foo" % (unique_database))
     assert cursor.fetchall() == [(0, )]
 
-  def test_kudu_rename_table(self, cursor, kudu_client, unique_database):
-    """Test Kudu table rename"""
-    cursor.execute("""CREATE TABLE %s.foo (a INT PRIMARY KEY) PARTITION BY HASH(a)
-        PARTITIONS 3 STORED AS KUDU""" % unique_database)
-    kudu_tbl_name = KuduTestSuite.to_kudu_table_name(unique_database, "foo")
-    assert kudu_client.table_exists(kudu_tbl_name)
-    new_kudu_tbl_name = "blah"
-    cursor.execute("ALTER TABLE %s.foo SET TBLPROPERTIES('kudu.table_name'='%s')" % (
-        unique_database, new_kudu_tbl_name))
-    assert kudu_client.table_exists(new_kudu_tbl_name)
-    assert not kudu_client.table_exists(kudu_tbl_name)
-
   def test_kudu_show_unbounded_range_partition(self, cursor, kudu_client,
                                                unique_database):
     """Check that a single unbounded range partition gets printed correctly."""
@@ -854,36 +842,48 @@ class TestShowCreateTable(KuduTestSuite):
       create_sql_fmt % ("2009-01-01 00:00:00.000000999"),
       show_create_sql_fmt % ("1230768000000001"))
 
-  def test_properties(self, cursor):
-    # If an explicit table name is used for the Kudu table and it differs from what
-    # would be the default Kudu table name, the name should be shown as a table property.
-    kudu_table = self.random_table_name()
-    props = "'kudu.table_name'='%s'" % kudu_table
-    self.assert_show_create_equals(cursor,
-        """
-        CREATE TABLE {{table}} (c INT PRIMARY KEY)
-        PARTITION BY HASH (c) PARTITIONS 3
-        STORED AS KUDU
-        TBLPROPERTIES ({props})""".format(props=props),
-        """
-        CREATE TABLE {db}.{{table}} (
-          c INT NOT NULL ENCODING AUTO_ENCODING COMPRESSION DEFAULT_COMPRESSION,
-          PRIMARY KEY (c)
-        )
-        PARTITION BY HASH (c) PARTITIONS 3
-        STORED AS KUDU
-        TBLPROPERTIES ('kudu.master_addresses'='{kudu_addr}', {props})""".format(
-            db=cursor.conn.db_name, kudu_addr=KUDU_MASTER_HOSTS, props=props))
+  def test_external_kudu_table_name_with_show_create(self, cursor, kudu_client,
+      unique_database):
+    """Check that the generated kudu.table_name tblproperty is present with
+       show create table with external Kudu tables.
+    """
+    schema_builder = SchemaBuilder()
+    column_spec = schema_builder.add_column("id", INT64)
+    column_spec.nullable(False)
+    schema_builder.set_primary_keys(["id"])
+    partitioning = Partitioning().set_range_partition_columns(["id"])
+    schema = schema_builder.build()
 
-    # If the name is explicitly given (or not given at all) so that the name is the same
-    # as the default name, the table name is not shown.
-    props = "'kudu.table_name'='impala::{db}.{table}'"
+    kudu_table_name = self.random_table_name()
+    try:
+      kudu_client.create_table(kudu_table_name, schema, partitioning)
+      kudu_table = kudu_client.table(kudu_table_name)
+
+      table_name_prop = "'kudu.table_name'='%s'" % kudu_table.name
+      self.assert_show_create_equals(cursor,
+          """
+          CREATE EXTERNAL TABLE {{table}} STORED AS KUDU
+          TBLPROPERTIES({props})""".format(
+              props=table_name_prop),
+          """
+          CREATE EXTERNAL TABLE {db}.{{table}}
+          STORED AS KUDU
+          TBLPROPERTIES ('kudu.master_addresses'='{kudu_addr}', {kudu_table})""".format(
+              db=cursor.conn.db_name, kudu_addr=KUDU_MASTER_HOSTS,
+              kudu_table=table_name_prop))
+    finally:
+      if kudu_client.table_exists(kudu_table_name):
+        kudu_client.delete_table(kudu_table_name)
+
+  def test_managed_kudu_table_name_with_show_create(self, cursor):
+    """Check that the generated kudu.table_name tblproperty is not present with
+       show create table with managed Kudu tables.
+    """
     self.assert_show_create_equals(cursor,
         """
-        CREATE TABLE {{table}} (c INT PRIMARY KEY)
+        CREATE TABLE {table} (c INT PRIMARY KEY)
         PARTITION BY HASH (c) PARTITIONS 3
-        STORED AS KUDU
-        TBLPROPERTIES ({props})""".format(props=props),
+        STORED AS KUDU""",
         """
         CREATE TABLE {db}.{{table}} (
           c INT NOT NULL ENCODING AUTO_ENCODING COMPRESSION DEFAULT_COMPRESSION,
@@ -894,7 +894,6 @@ class TestShowCreateTable(KuduTestSuite):
         TBLPROPERTIES ('kudu.master_addresses'='{kudu_addr}')""".format(
             db=cursor.conn.db_name, kudu_addr=KUDU_MASTER_HOSTS))
 
-
 class TestDropDb(KuduTestSuite):
 
   def test_drop_non_empty_db(self, unique_cursor, kudu_client):
@@ -932,9 +931,9 @@ class TestDropDb(KuduTestSuite):
       managed_table_name = self.random_table_name()
       unique_cursor.execute("""
           CREATE TABLE %s (a INT PRIMARY KEY) PARTITION BY HASH (a) PARTITIONS 3
-          STORED AS KUDU TBLPROPERTIES ('kudu.table_name' = '%s')"""
-          % (managed_table_name, managed_table_name))
-      assert kudu_client.table_exists(managed_table_name)
+          STORED AS KUDU""" % managed_table_name)
+      kudu_table_name = "impala::" + db_name + "." + managed_table_name
+      assert kudu_client.table_exists(kudu_table_name)
 
       # Create a table in HDFS
       hdfs_table_name = self.random_table_name()


[5/5] impala git commit: IMPALA-6420: Fix TestCharFormats for local filesystem tests

Posted by ph...@apache.org.
IMPALA-6420: Fix TestCharFormats for local filesystem tests

Tl;dr - running tests was covering up a bug in the tests,
or alternatively, not running tests exposes a bug.

Local tests were failing because we failed to prepend
the local filesystem prefix.  This was covered up because
the snapshot creation job runs tests, which actually creates
these tables in the metastore.  When running local filesystem
tests with a snapshot, the table creation never runs because
the snapshot import converts hdfs:// locations into local
file locations.  The problematic CREATE TABLE x IF NOT EXISTS
then never get run, hiding the problem.

Testing: With a local filesystem install, in Impala shell:
    drop table functional_parquet.chars_formats

    and then:
     tests/run-tests.py  query_test/test_chars.py -k char_format

Change-Id: I916b5566a3c5a83185e502b814ad43b5af14e87f
Reviewed-on: http://gerrit.cloudera.org:8080/9074
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/8f0ef09a
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/8f0ef09a
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/8f0ef09a

Branch: refs/heads/2.x
Commit: 8f0ef09a8a41e8073223abff189b91ce7aeff080
Parents: 38a9ab7
Author: Zachary Amsden <za...@cloudera.com>
Authored: Thu Jan 18 13:44:27 2018 -0800
Committer: Philip Zeyliger <ph...@cloudera.com>
Committed: Mon Jan 22 16:41:16 2018 -0800

----------------------------------------------------------------------
 tests/query_test/test_chars.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/8f0ef09a/tests/query_test/test_chars.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_chars.py b/tests/query_test/test_chars.py
index eaa744a..b182b91 100644
--- a/tests/query_test/test_chars.py
+++ b/tests/query_test/test_chars.py
@@ -19,6 +19,7 @@ import pytest
 
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.test_dimensions import create_exec_option_dimension
+from tests.util.filesystem_utils import get_fs_path
 
 class TestStringQueries(ImpalaTestSuite):
   @classmethod
@@ -54,25 +55,25 @@ class TestCharFormats(ImpalaTestSuite):
         functional_parquet.chars_formats
         (cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
         STORED AS PARQUET
-        LOCATION "/test-warehouse/chars_formats_parquet"''')
+        LOCATION "{0}"'''.format(get_fs_path("/test-warehouse/chars_formats_parquet")))
     self.client.execute('''create external table if not exists
         functional.chars_formats
         (cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
         ROW FORMAT delimited fields terminated by ','  escaped by '\\\\'
         STORED AS TEXTFILE
-        LOCATION "/test-warehouse/chars_formats_text"
-        ''')
+        LOCATION "{0}"'''.format(get_fs_path("/test-warehouse/chars_formats_text")))
     self.client.execute('''create external table if not exists
         functional_avro_snap.chars_formats
         (cs CHAR(5), cl CHAR(140), vc VARCHAR(32))
         STORED AS AVRO
-        LOCATION "/test-warehouse/chars_formats_avro_snap"
-        TBLPROPERTIES ('avro.schema.literal'='{"type":"record",
+        LOCATION "{0}"
+        TBLPROPERTIES ('avro.schema.literal'='{{"type":"record",
         "name":"CharTypesTest","doc":"Schema generated by Kite",
         "fields":[
-        {"name":"cs","type":["null","string"], "doc":"Type inferred"},
-        {"name":"cl","type":["null","string"], "doc":"Type inferred"},
-        {"name":"vc","type":["null","string"],"doc":"Type inferred"}]}')''')
+        {{"name":"cs","type":["null","string"], "doc":"Type inferred"}},
+        {{"name":"cl","type":["null","string"], "doc":"Type inferred"}},
+        {{"name":"vc","type":["null","string"],"doc":"Type inferred"}}]}}')
+        '''.format(get_fs_path("/test-warehouse/chars_formats_avro_snap")))
 
   @classmethod
   def add_test_dimensions(cls):


[3/5] impala git commit: IMPALA-6422: Use ldexp() instead of powf() in HLL.

Posted by ph...@apache.org.
IMPALA-6422: Use ldexp() instead of powf() in HLL.

Using ldexp() to compute a floating point power of two is
over 10x faster than powf().

This change is particularly helpful for speeding up
COMPUTE STATS TABLESAMPLE which has many calls to
HllFinalEstimate() where floating point power of two
computations are relevant.

Testing:
- core/hdfs run passed

Change-Id: I517614d3f9cf1cf56b15a173c3cfe76e0f2e0382
Reviewed-on: http://gerrit.cloudera.org:8080/9078
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/38a9ab78
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/38a9ab78
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/38a9ab78

Branch: refs/heads/2.x
Commit: 38a9ab78b95e2229a3dc97f766833e9ef9d7330d
Parents: 4afabd4
Author: Alex Behm <al...@cloudera.com>
Authored: Thu Jan 18 19:06:30 2018 -0800
Committer: Philip Zeyliger <ph...@cloudera.com>
Committed: Mon Jan 22 16:41:16 2018 -0800

----------------------------------------------------------------------
 be/src/exec/incr-stats-util.cc         |  3 +--
 be/src/exprs/aggregate-functions-ir.cc | 13 +++++--------
 be/src/exprs/aggregate-functions.h     |  6 +++---
 3 files changed, 9 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/38a9ab78/be/src/exec/incr-stats-util.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/incr-stats-util.cc b/be/src/exec/incr-stats-util.cc
index 6f5e2b6..f0bb73f 100644
--- a/be/src/exec/incr-stats-util.cc
+++ b/be/src/exec/incr-stats-util.cc
@@ -165,8 +165,7 @@ struct PerColumnStats {
   // avg_width contain valid values.
   void Finalize() {
     ndv_estimate = AggregateFunctions::HllFinalEstimate(
-        reinterpret_cast<const uint8_t*>(intermediate_ndv.data()),
-        intermediate_ndv.size());
+        reinterpret_cast<const uint8_t*>(intermediate_ndv.data()));
     avg_width = num_rows == 0 ? 0 : avg_width / num_rows;
   }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/38a9ab78/be/src/exprs/aggregate-functions-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/aggregate-functions-ir.cc b/be/src/exprs/aggregate-functions-ir.cc
index a8fbe57..9af563e 100644
--- a/be/src/exprs/aggregate-functions-ir.cc
+++ b/be/src/exprs/aggregate-functions-ir.cc
@@ -1467,10 +1467,8 @@ void AggregateFunctions::HllMerge(
   }
 }
 
-uint64_t AggregateFunctions::HllFinalEstimate(const uint8_t* buckets,
-    int32_t num_buckets) {
+uint64_t AggregateFunctions::HllFinalEstimate(const uint8_t* buckets) {
   DCHECK(buckets != NULL);
-  DCHECK_EQ(num_buckets, HLL_LEN);
 
   // Empirical constants for the algorithm.
   float alpha = 0;
@@ -1486,9 +1484,8 @@ uint64_t AggregateFunctions::HllFinalEstimate(const uint8_t* buckets,
 
   float harmonic_mean = 0;
   int num_zero_registers = 0;
-  // TODO: Consider improving this loop (e.g. replacing 'if' with arithmetic op).
-  for (int i = 0; i < num_buckets; ++i) {
-    harmonic_mean += powf(2.0f, -buckets[i]);
+  for (int i = 0; i < HLL_LEN; ++i) {
+    harmonic_mean += ldexp(1.0f, -buckets[i]);
     if (buckets[i] == 0) ++num_zero_registers;
   }
   harmonic_mean = 1.0f / harmonic_mean;
@@ -1509,7 +1506,7 @@ uint64_t AggregateFunctions::HllFinalEstimate(const uint8_t* buckets,
 
 BigIntVal AggregateFunctions::HllFinalize(FunctionContext* ctx, const StringVal& src) {
   if (UNLIKELY(src.is_null)) return BigIntVal::null();
-  uint64_t estimate = HllFinalEstimate(src.ptr, src.len);
+  uint64_t estimate = HllFinalEstimate(src.ptr);
   return estimate;
 }
 
@@ -1620,7 +1617,7 @@ BigIntVal AggregateFunctions::SampledNdvFinalize(FunctionContext* ctx,
       counts[pidx] = merged_count;
       StringVal hll = StringVal(state->buckets[bucket_idx].hll, HLL_LEN);
       HllMerge(ctx, hll, &merged_hll);
-      ndvs[pidx] = HllFinalEstimate(merged_hll.ptr, HLL_LEN);
+      ndvs[pidx] = HllFinalEstimate(merged_hll.ptr);
       ++pidx;
     }
     min_count = std::min(min_count, state->buckets[i].row_count);

http://git-wip-us.apache.org/repos/asf/impala/blob/38a9ab78/be/src/exprs/aggregate-functions.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/aggregate-functions.h b/be/src/exprs/aggregate-functions.h
index 81884c1..243e9f7 100644
--- a/be/src/exprs/aggregate-functions.h
+++ b/be/src/exprs/aggregate-functions.h
@@ -200,9 +200,9 @@ class AggregateFunctions {
   static void HllMerge(FunctionContext*, const StringVal& src, StringVal* dst);
   static BigIntVal HllFinalize(FunctionContext*, const StringVal& src);
 
-  /// Utility method to compute the final result of an HLL estimation from num_buckets
-  /// estimates.
-  static uint64_t HllFinalEstimate(const uint8_t* buckets, int32_t num_buckets);
+  /// Utility method to compute the final result of an HLL estimation.
+  /// Assumes HLL_LEN number of buckets.
+  static uint64_t HllFinalEstimate(const uint8_t* buckets);
 
   /// Estimates the number of distinct values (NDV) based on a sample of data and the
   /// corresponding sampling rate. The main idea of this function is to collect several


[2/5] impala git commit: IMPALA-6427: fix QUERYOPTIONS in planner test output

Posted by ph...@apache.org.
IMPALA-6427: fix QUERYOPTIONS in planner test output

Testing:
Ran planner tests. Confirmed that the generated union.test
included the QUERYOPTIONS section and that test files without the
QUERYOPTIONS section did not have QUERYOPTIONS sections added.

Change-Id: I2971588f977f72b0f869370803b089d56303d91a
Reviewed-on: http://gerrit.cloudera.org:8080/9081
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/e376d333
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/e376d333
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/e376d333

Branch: refs/heads/2.x
Commit: e376d333c9f1a78fddbe9c5c2de718fb9d8bf0a6
Parents: 8f0ef09
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Fri Jan 19 12:22:18 2018 -0800
Committer: Philip Zeyliger <ph...@cloudera.com>
Committed: Mon Jan 22 16:41:16 2018 -0800

----------------------------------------------------------------------
 .../test/java/org/apache/impala/planner/PlannerTestBase.java | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/e376d333/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
index ab538ff..bc93160 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
@@ -19,7 +19,6 @@ package org.apache.impala.planner;
 
 import static org.junit.Assert.fail;
 
-import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.nio.file.Paths;
@@ -745,6 +744,13 @@ public class PlannerTestBase extends FrontendTestBase {
     for (TestCase testCase : queryFileParser.getTestCases()) {
       actualOutput.append(testCase.getSectionAsString(Section.QUERY, true, "\n"));
       actualOutput.append("\n");
+      String queryOptionsSection = testCase.getSectionAsString(
+          Section.QUERYOPTIONS, true, "\n");
+      if (queryOptionsSection != null && !queryOptionsSection.isEmpty()) {
+        actualOutput.append("---- QUERYOPTIONS\n");
+        actualOutput.append(queryOptionsSection);
+        actualOutput.append("\n");
+      }
       try {
         runTestCase(testCase, errorLog, actualOutput, dbName, ignoreExplainHeader);
       } catch (CatalogException e) {


[4/5] impala git commit: IMPALA-6092: run flaky test serially (temporary).

Posted by ph...@apache.org.
IMPALA-6092: run flaky test serially (temporary).

The reason for the flake is described in IMPALA-6215.
This patch runs the test serially to help reduce the
recent increase of flakes on gvo.

Change-Id: I4a4165f0e82f270df2c8a3af087a9a6ec63fd086
Reviewed-on: http://gerrit.cloudera.org:8080/9080
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/b008b77f
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/b008b77f
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/b008b77f

Branch: refs/heads/2.x
Commit: b008b77fe1011e92023f063666b488a2b572580f
Parents: e376d33
Author: Vuk Ercegovac <ve...@cloudera.com>
Authored: Fri Jan 19 12:30:54 2018 -0800
Committer: Philip Zeyliger <ph...@cloudera.com>
Committed: Mon Jan 22 16:41:16 2018 -0800

----------------------------------------------------------------------
 tests/query_test/test_udfs.py | 2 ++
 1 file changed, 2 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/b008b77f/tests/query_test/test_udfs.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py
index a9d0974..1ff716a 100644
--- a/tests/query_test/test_udfs.py
+++ b/tests/query_test/test_udfs.py
@@ -316,6 +316,8 @@ class TestUdfExecution(TestUdfBase):
       self.run_test_case('QueryTest/udf-non-deterministic', vector,
           use_db=unique_database)
 
+  # Runs serially as a temporary workaround for IMPALA_6092.
+  @pytest.mark.execute_serially
   def test_java_udfs(self, vector, unique_database):
     self.run_test_case('QueryTest/load-java-udfs', vector, use_db=unique_database)
     self.run_test_case('QueryTest/java-udf', vector, use_db=unique_database)