You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by cs...@apache.org on 2019/07/31 12:57:40 UTC

[impala] branch master updated: IMPALA-8808: Add query option default_transactional_type

This is an automated email from the ASF dual-hosted git repository.

csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new b587751  IMPALA-8808: Add query option default_transactional_type
b587751 is described below

commit b587751e72c6d1ac1836edbae7d82e9110ca6574
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Tue Jul 30 16:28:43 2019 +0200

    IMPALA-8808: Add query option default_transactional_type
    
    Add a query option that allows to create insert-only ACID tables
    by default.
    default_transactional_type's possible values:
    - NONE (default)
    - INSERT_ONLY
    
    If either properties "transactional" or "transactional_properties" are
    defined, then default_transactional_type is ignored.
    
    default_transactional_type does not affect external or Kudu tables, as
    these cannot be transactional.
    
    Possible TODO: value "INSERT_ONLY" could be treated as error in Hive 2
    environment.
    
    Change-Id: I2baaebaf79b9e983cf75c6c6879eacd88b1de547
    Reviewed-on: http://gerrit.cloudera.org:8080/13954
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
---
 be/src/service/query-options.cc                    |  7 +++
 be/src/service/query-options.h                     |  6 ++-
 be/src/util/debug-util.cc                          |  1 +
 be/src/util/debug-util.h                           |  1 +
 common/thrift/ImpalaInternalService.thrift         |  9 ++++
 common/thrift/ImpalaService.thrift                 |  4 ++
 .../java/org/apache/impala/analysis/TableDef.java  | 29 +++++++++++-
 .../java/org/apache/impala/util/AcidUtils.java     | 22 ++++++++++
 .../functional-query/queries/QueryTest/set.test    |  5 +++
 tests/metadata/test_ddl.py                         | 51 ++++++++++++++++++++++
 10 files changed, 131 insertions(+), 4 deletions(-)

diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index b4eaf20..a84dd49 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -808,6 +808,13 @@ Status impala::SetQueryOption(const string& key, const string& value,
         query_options->__set_spool_query_results(IsTrue(value));
         break;
       }
+      case TImpalaQueryOptions::DEFAULT_TRANSACTIONAL_TYPE: {
+        TTransactionalType::type enum_type;
+        RETURN_IF_ERROR(GetThriftEnum(value, "default transactional type",
+            _TTransactionalType_VALUES_TO_NAMES, &enum_type));
+        query_options->__set_default_transactional_type(enum_type);
+        break;
+      }
       default:
         if (IsRemovedQueryOption(key)) {
           LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'";
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 535d5ba..6bb7153 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -47,7 +47,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
 // time we add or remove a query option to/from the enum TImpalaQueryOptions.
 #define QUERY_OPTS_TABLE\
   DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\
-      TImpalaQueryOptions::SPOOL_QUERY_RESULTS + 1);\
+      TImpalaQueryOptions::DEFAULT_TRANSACTIONAL_TYPE + 1);\
   REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\
   QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\
   REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\
@@ -172,7 +172,9 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
   QUERY_OPT_FN(default_hints_insert_statement, DEFAULT_HINTS_INSERT_STATEMENT,\
       TQueryOptionLevel::REGULAR)\
   QUERY_OPT_FN(spool_query_results, SPOOL_QUERY_RESULTS,\
-      TQueryOptionLevel::DEVELOPMENT)
+      TQueryOptionLevel::DEVELOPMENT)\
+  QUERY_OPT_FN(default_transactional_type, DEFAULT_TRANSACTIONAL_TYPE,\
+      TQueryOptionLevel::ADVANCED)
   ;
 
 /// Enforce practical limits on some query options to avoid undesired query state.
diff --git a/be/src/util/debug-util.cc b/be/src/util/debug-util.cc
index 7029a71..1bad6fc 100644
--- a/be/src/util/debug-util.cc
+++ b/be/src/util/debug-util.cc
@@ -86,6 +86,7 @@ PRINT_THRIFT_ENUM_IMPL(TSessionType)
 PRINT_THRIFT_ENUM_IMPL(TStmtType)
 PRINT_THRIFT_ENUM_IMPL(TUnit)
 PRINT_THRIFT_ENUM_IMPL(TParquetTimestampType)
+PRINT_THRIFT_ENUM_IMPL(TTransactionalType)
 
 string PrintId(const TUniqueId& id, const string& separator) {
   stringstream out;
diff --git a/be/src/util/debug-util.h b/be/src/util/debug-util.h
index b2235ac..f88495b 100644
--- a/be/src/util/debug-util.h
+++ b/be/src/util/debug-util.h
@@ -70,6 +70,7 @@ std::string PrintThriftEnum(const TSessionType::type& value);
 std::string PrintThriftEnum(const TStmtType::type& value);
 std::string PrintThriftEnum(const TUnit::type& value);
 std::string PrintThriftEnum(const TParquetTimestampType::type& value);
+std::string PrintThriftEnum(const TTransactionalType::type& value);
 
 std::string PrintTuple(const Tuple* t, const TupleDescriptor& d);
 std::string PrintRow(TupleRow* row, const RowDescriptor& d);
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index 2cc2f3e..3e12895 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -75,6 +75,12 @@ enum TParquetTimestampType {
   INT64_NANOS
 }
 
+// A table's Hive ACID type.
+enum TTransactionalType {
+  NONE,
+  INSERT_ONLY
+}
+
 // Query options that correspond to ImpalaService.ImpalaQueryOptions, with their
 // respective defaults. Query options can be set in the following ways:
 //
@@ -358,6 +364,9 @@ struct TQueryOptions {
 
   // See comment in ImpalaService.thrift
   86: optional bool spool_query_results = false;
+
+  // See comment in ImpalaService.thrift
+  87: optional TTransactionalType default_transactional_type = TTransactionalType.NONE;
 }
 
 // Impala currently has two types of sessions: Beeswax and HiveServer2
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index f24124f..8d534f0 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -411,6 +411,10 @@ enum TImpalaQueryOptions {
   // up more memory. If false, client consumption driven backpressure controls the rate
   // at which rows are materialized by the execution tree.
   SPOOL_QUERY_RESULTS = 85
+
+  // Speficies the default transactional type for new HDFS tables.
+  // Valid values: none, insert_only
+  DEFAULT_TRANSACTIONAL_TYPE = 86
 }
 
 // The summary of a DML statement.
diff --git a/fe/src/main/java/org/apache/impala/analysis/TableDef.java b/fe/src/main/java/org/apache/impala/analysis/TableDef.java
index b37864a..f9f57c5 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TableDef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TableDef.java
@@ -38,6 +38,7 @@ import org.apache.impala.thrift.TAccessEvent;
 import org.apache.impala.thrift.TCatalogObjectType;
 import org.apache.impala.thrift.THdfsFileFormat;
 import org.apache.impala.thrift.TQueryOptions;
+import org.apache.impala.util.AcidUtils;
 import org.apache.impala.util.MetaStoreUtil;
 
 import com.google.common.base.Preconditions;
@@ -222,8 +223,7 @@ class TableDef {
     Preconditions.checkState(tableName_ != null && !tableName_.isEmpty());
     fqTableName_ = analyzer.getFqTableName(getTblName());
     fqTableName_.analyze();
-    // Disallow creation of full ACID table.
-    analyzer.ensureTableNotFullAcid(options_.tblProperties, fqTableName_.toString());
+    analyzeAcidProperties(analyzer);
     analyzeColumnDefs(analyzer);
     analyzePrimaryKeys();
 
@@ -441,4 +441,29 @@ class TableDef {
     }
     return byteVal;
   }
+
+  /**
+   * Analyzes Hive ACID related properties.
+   * Can change table properties based on query options.
+   */
+  private void analyzeAcidProperties(Analyzer analyzer) throws AnalysisException {
+    if (isExternal_) {
+      if (AcidUtils.isTransactionalTable(options_.tblProperties)) {
+        throw new AnalysisException("EXTERNAL tables cannot be transactional");
+      }
+      return;
+    }
+
+    if (options_.fileFormat == THdfsFileFormat.KUDU) {
+      if (AcidUtils.isTransactionalTable(options_.tblProperties)) {
+        throw new AnalysisException("Kudu tables cannot be transactional");
+      }
+      return;
+    }
+
+    AcidUtils.setTransactionalProperties(options_.tblProperties,
+          analyzer.getQueryOptions().getDefault_transactional_type());
+    // Disallow creation of full ACID table.
+    analyzer.ensureTableNotFullAcid(options_.tblProperties, fqTableName_.toString());
+  }
 }
diff --git a/fe/src/main/java/org/apache/impala/util/AcidUtils.java b/fe/src/main/java/org/apache/impala/util/AcidUtils.java
index 5642533..4f4ee7a 100644
--- a/fe/src/main/java/org/apache/impala/util/AcidUtils.java
+++ b/fe/src/main/java/org/apache/impala/util/AcidUtils.java
@@ -26,6 +26,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.impala.catalog.FileMetadataLoader.LoadStats;
 import org.apache.impala.common.FileSystemUtil;
+import org.apache.impala.thrift.TQueryOptions;
+import org.apache.impala.thrift.TTransactionalType;
 
 import java.util.ArrayList;
 import java.util.Iterator;
@@ -101,6 +103,26 @@ public class AcidUtils {
     return isTransactionalTable(props) && !isInsertOnlyTable(props);
   }
 
+  // Sets transaction related table properties for new tables based on manually
+  // set table properties and default transactional type.
+  public static void setTransactionalProperties(Map<String, String> props,
+      TTransactionalType defaultTransactionalType) {
+    Preconditions.checkNotNull(props);
+    if (props.get(TABLE_IS_TRANSACTIONAL) != null
+        || props.get(TABLE_TRANSACTIONAL_PROPERTIES) != null) {
+      // Table properties are set manually, ignore default.
+      return;
+    }
+
+    switch (defaultTransactionalType) {
+      case NONE: break;
+      case INSERT_ONLY:
+        props.put(TABLE_IS_TRANSACTIONAL, "true");
+        props.put(TABLE_TRANSACTIONAL_PROPERTIES, INSERTONLY_TRANSACTIONAL_PROPERTY);
+        break;
+    }
+  }
+
   /**
    * Predicate that checks if the file or directory is relevant for a given WriteId list.
    * <p>
diff --git a/testdata/workloads/functional-query/queries/QueryTest/set.test b/testdata/workloads/functional-query/queries/QueryTest/set.test
index 1a5f633..f6c1a63 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/set.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/set.test
@@ -145,6 +145,11 @@ set default_file_format=bar
 Invalid default file format: 'bar'. Valid values are TEXT(0), RC_FILE(1), SEQUENCE_FILE(2), AVRO(3), PARQUET(4), KUDU(5), ORC(6).
 ====
 ---- QUERY
+set default_transactional_type=bar
+---- CATCH
+Invalid default transactional type: 'bar'. Valid values are NONE(0), INSERT_ONLY(1).
+====
+---- QUERY
 # Test that SET actually does change the mem_limit.
 # First, show mem_limit is not hit.
 select 1
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index 41502cc..3a3c982 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -780,6 +780,57 @@ class TestDdlStatements(TestDdlBase):
       self.client, "show create table {0}".format(orc_table))
     assert any("ORC" in x for x in result.data)
 
+  @SkipIfHive2.acid
+  def test_create_table_transactional_type(self, vector, unique_database):
+    # When default_transactional_type query option is not specified, the transaction
+    # related table properties are not set.
+    non_acid_table = "{0}.non_acid_tbl".format(unique_database)
+    self.execute_query_expect_success(
+        self.client, "create table {0}(i int)".format(non_acid_table),
+        {"default_transactional_type": "none"})
+    props = self._get_properties("Table Parameters", non_acid_table)
+    assert "transactional" not in props
+    assert "transactional_properties" not in props
+
+    # Create table as "insert_only" transactional.
+    insert_only_acid_table = "{0}.insert_only_acid_tbl".format(unique_database)
+    self.execute_query_expect_success(
+        self.client, "create table {0}(i int)".format(insert_only_acid_table),
+        {"default_transactional_type": "insert_only"})
+    props = self._get_properties("Table Parameters", insert_only_acid_table)
+    assert props["transactional"] == "true"
+    assert props["transactional_properties"] == "insert_only"
+
+    # default_transactional_type query option should not affect external tables
+    external_table = "{0}.external_tbl".format(unique_database)
+    self.execute_query_expect_success(
+        self.client, "create external table {0}(i int)".format(external_table),
+        {"default_transactional_type": "insert_only"})
+    props = self._get_properties("Table Parameters", external_table)
+    assert "transactional" not in props
+    assert "transactional_properties" not in props
+
+    # default_transactional_type query option should not affect Kudu tables.
+    kudu_table = "{0}.kudu_tbl".format(unique_database)
+    self.execute_query_expect_success(
+        self.client,
+        "create table {0}(i int primary key) stored as kudu".format(kudu_table),
+        {"default_transactional_type": "insert_only"})
+    props = self._get_properties("Table Parameters", kudu_table)
+    assert "transactional" not in props
+    assert "transactional_properties" not in props
+
+    # default_transactional_type query option should have no effect when transactional
+    # table properties are set manually.
+    manual_acid_table = "{0}.manual_acid_tbl".format(unique_database)
+    self.execute_query_expect_success(
+        self.client, "create table {0}(i int) TBLPROPERTIES ('transactional'='false')"
+            .format(manual_acid_table),
+        {"default_transactional_type": "insert_only"})
+    props = self._get_properties("Table Parameters", manual_acid_table)
+    assert "transactional" not in props
+    assert "transactional_properties" not in props
+
   def test_kudu_column_comment(self, vector, unique_database):
     table = "{0}.kudu_table0".format(unique_database)
     self.client.execute("create table {0}(x int comment 'x' primary key) \