You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by cs...@apache.org on 2019/07/31 12:57:40 UTC
[impala] branch master updated: IMPALA-8808: Add query option
default_transactional_type
This is an automated email from the ASF dual-hosted git repository.
csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new b587751 IMPALA-8808: Add query option default_transactional_type
b587751 is described below
commit b587751e72c6d1ac1836edbae7d82e9110ca6574
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Tue Jul 30 16:28:43 2019 +0200
IMPALA-8808: Add query option default_transactional_type
Add a query option that allows to create insert-only ACID tables
by default.
default_transactional_type's possible values:
- NONE (default)
- INSERT_ONLY
If either properties "transactional" or "transactional_properties" are
defined, then default_transactional_type is ignored.
default_transactional_type does not affect external or Kudu tables, as
these cannot be transactional.
Possible TODO: value "INSERT_ONLY" could be treated as error in Hive 2
environment.
Change-Id: I2baaebaf79b9e983cf75c6c6879eacd88b1de547
Reviewed-on: http://gerrit.cloudera.org:8080/13954
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Zoltan Borok-Nagy <bo...@cloudera.com>
---
be/src/service/query-options.cc | 7 +++
be/src/service/query-options.h | 6 ++-
be/src/util/debug-util.cc | 1 +
be/src/util/debug-util.h | 1 +
common/thrift/ImpalaInternalService.thrift | 9 ++++
common/thrift/ImpalaService.thrift | 4 ++
.../java/org/apache/impala/analysis/TableDef.java | 29 +++++++++++-
.../java/org/apache/impala/util/AcidUtils.java | 22 ++++++++++
.../functional-query/queries/QueryTest/set.test | 5 +++
tests/metadata/test_ddl.py | 51 ++++++++++++++++++++++
10 files changed, 131 insertions(+), 4 deletions(-)
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index b4eaf20..a84dd49 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -808,6 +808,13 @@ Status impala::SetQueryOption(const string& key, const string& value,
query_options->__set_spool_query_results(IsTrue(value));
break;
}
+ case TImpalaQueryOptions::DEFAULT_TRANSACTIONAL_TYPE: {
+ TTransactionalType::type enum_type;
+ RETURN_IF_ERROR(GetThriftEnum(value, "default transactional type",
+ _TTransactionalType_VALUES_TO_NAMES, &enum_type));
+ query_options->__set_default_transactional_type(enum_type);
+ break;
+ }
default:
if (IsRemovedQueryOption(key)) {
LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'";
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 535d5ba..6bb7153 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -47,7 +47,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
// time we add or remove a query option to/from the enum TImpalaQueryOptions.
#define QUERY_OPTS_TABLE\
DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\
- TImpalaQueryOptions::SPOOL_QUERY_RESULTS + 1);\
+ TImpalaQueryOptions::DEFAULT_TRANSACTIONAL_TYPE + 1);\
REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\
QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\
REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\
@@ -172,7 +172,9 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
QUERY_OPT_FN(default_hints_insert_statement, DEFAULT_HINTS_INSERT_STATEMENT,\
TQueryOptionLevel::REGULAR)\
QUERY_OPT_FN(spool_query_results, SPOOL_QUERY_RESULTS,\
- TQueryOptionLevel::DEVELOPMENT)
+ TQueryOptionLevel::DEVELOPMENT)\
+ QUERY_OPT_FN(default_transactional_type, DEFAULT_TRANSACTIONAL_TYPE,\
+ TQueryOptionLevel::ADVANCED)
;
/// Enforce practical limits on some query options to avoid undesired query state.
diff --git a/be/src/util/debug-util.cc b/be/src/util/debug-util.cc
index 7029a71..1bad6fc 100644
--- a/be/src/util/debug-util.cc
+++ b/be/src/util/debug-util.cc
@@ -86,6 +86,7 @@ PRINT_THRIFT_ENUM_IMPL(TSessionType)
PRINT_THRIFT_ENUM_IMPL(TStmtType)
PRINT_THRIFT_ENUM_IMPL(TUnit)
PRINT_THRIFT_ENUM_IMPL(TParquetTimestampType)
+PRINT_THRIFT_ENUM_IMPL(TTransactionalType)
string PrintId(const TUniqueId& id, const string& separator) {
stringstream out;
diff --git a/be/src/util/debug-util.h b/be/src/util/debug-util.h
index b2235ac..f88495b 100644
--- a/be/src/util/debug-util.h
+++ b/be/src/util/debug-util.h
@@ -70,6 +70,7 @@ std::string PrintThriftEnum(const TSessionType::type& value);
std::string PrintThriftEnum(const TStmtType::type& value);
std::string PrintThriftEnum(const TUnit::type& value);
std::string PrintThriftEnum(const TParquetTimestampType::type& value);
+std::string PrintThriftEnum(const TTransactionalType::type& value);
std::string PrintTuple(const Tuple* t, const TupleDescriptor& d);
std::string PrintRow(TupleRow* row, const RowDescriptor& d);
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index 2cc2f3e..3e12895 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -75,6 +75,12 @@ enum TParquetTimestampType {
INT64_NANOS
}
+// A table's Hive ACID type.
+enum TTransactionalType {
+ NONE,
+ INSERT_ONLY
+}
+
// Query options that correspond to ImpalaService.ImpalaQueryOptions, with their
// respective defaults. Query options can be set in the following ways:
//
@@ -358,6 +364,9 @@ struct TQueryOptions {
// See comment in ImpalaService.thrift
86: optional bool spool_query_results = false;
+
+ // See comment in ImpalaService.thrift
+ 87: optional TTransactionalType default_transactional_type = TTransactionalType.NONE;
}
// Impala currently has two types of sessions: Beeswax and HiveServer2
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index f24124f..8d534f0 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -411,6 +411,10 @@ enum TImpalaQueryOptions {
// up more memory. If false, client consumption driven backpressure controls the rate
// at which rows are materialized by the execution tree.
SPOOL_QUERY_RESULTS = 85
+
+ // Speficies the default transactional type for new HDFS tables.
+ // Valid values: none, insert_only
+ DEFAULT_TRANSACTIONAL_TYPE = 86
}
// The summary of a DML statement.
diff --git a/fe/src/main/java/org/apache/impala/analysis/TableDef.java b/fe/src/main/java/org/apache/impala/analysis/TableDef.java
index b37864a..f9f57c5 100644
--- a/fe/src/main/java/org/apache/impala/analysis/TableDef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/TableDef.java
@@ -38,6 +38,7 @@ import org.apache.impala.thrift.TAccessEvent;
import org.apache.impala.thrift.TCatalogObjectType;
import org.apache.impala.thrift.THdfsFileFormat;
import org.apache.impala.thrift.TQueryOptions;
+import org.apache.impala.util.AcidUtils;
import org.apache.impala.util.MetaStoreUtil;
import com.google.common.base.Preconditions;
@@ -222,8 +223,7 @@ class TableDef {
Preconditions.checkState(tableName_ != null && !tableName_.isEmpty());
fqTableName_ = analyzer.getFqTableName(getTblName());
fqTableName_.analyze();
- // Disallow creation of full ACID table.
- analyzer.ensureTableNotFullAcid(options_.tblProperties, fqTableName_.toString());
+ analyzeAcidProperties(analyzer);
analyzeColumnDefs(analyzer);
analyzePrimaryKeys();
@@ -441,4 +441,29 @@ class TableDef {
}
return byteVal;
}
+
+ /**
+ * Analyzes Hive ACID related properties.
+ * Can change table properties based on query options.
+ */
+ private void analyzeAcidProperties(Analyzer analyzer) throws AnalysisException {
+ if (isExternal_) {
+ if (AcidUtils.isTransactionalTable(options_.tblProperties)) {
+ throw new AnalysisException("EXTERNAL tables cannot be transactional");
+ }
+ return;
+ }
+
+ if (options_.fileFormat == THdfsFileFormat.KUDU) {
+ if (AcidUtils.isTransactionalTable(options_.tblProperties)) {
+ throw new AnalysisException("Kudu tables cannot be transactional");
+ }
+ return;
+ }
+
+ AcidUtils.setTransactionalProperties(options_.tblProperties,
+ analyzer.getQueryOptions().getDefault_transactional_type());
+ // Disallow creation of full ACID table.
+ analyzer.ensureTableNotFullAcid(options_.tblProperties, fqTableName_.toString());
+ }
}
diff --git a/fe/src/main/java/org/apache/impala/util/AcidUtils.java b/fe/src/main/java/org/apache/impala/util/AcidUtils.java
index 5642533..4f4ee7a 100644
--- a/fe/src/main/java/org/apache/impala/util/AcidUtils.java
+++ b/fe/src/main/java/org/apache/impala/util/AcidUtils.java
@@ -26,6 +26,8 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.impala.catalog.FileMetadataLoader.LoadStats;
import org.apache.impala.common.FileSystemUtil;
+import org.apache.impala.thrift.TQueryOptions;
+import org.apache.impala.thrift.TTransactionalType;
import java.util.ArrayList;
import java.util.Iterator;
@@ -101,6 +103,26 @@ public class AcidUtils {
return isTransactionalTable(props) && !isInsertOnlyTable(props);
}
+ // Sets transaction related table properties for new tables based on manually
+ // set table properties and default transactional type.
+ public static void setTransactionalProperties(Map<String, String> props,
+ TTransactionalType defaultTransactionalType) {
+ Preconditions.checkNotNull(props);
+ if (props.get(TABLE_IS_TRANSACTIONAL) != null
+ || props.get(TABLE_TRANSACTIONAL_PROPERTIES) != null) {
+ // Table properties are set manually, ignore default.
+ return;
+ }
+
+ switch (defaultTransactionalType) {
+ case NONE: break;
+ case INSERT_ONLY:
+ props.put(TABLE_IS_TRANSACTIONAL, "true");
+ props.put(TABLE_TRANSACTIONAL_PROPERTIES, INSERTONLY_TRANSACTIONAL_PROPERTY);
+ break;
+ }
+ }
+
/**
* Predicate that checks if the file or directory is relevant for a given WriteId list.
* <p>
diff --git a/testdata/workloads/functional-query/queries/QueryTest/set.test b/testdata/workloads/functional-query/queries/QueryTest/set.test
index 1a5f633..f6c1a63 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/set.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/set.test
@@ -145,6 +145,11 @@ set default_file_format=bar
Invalid default file format: 'bar'. Valid values are TEXT(0), RC_FILE(1), SEQUENCE_FILE(2), AVRO(3), PARQUET(4), KUDU(5), ORC(6).
====
---- QUERY
+set default_transactional_type=bar
+---- CATCH
+Invalid default transactional type: 'bar'. Valid values are NONE(0), INSERT_ONLY(1).
+====
+---- QUERY
# Test that SET actually does change the mem_limit.
# First, show mem_limit is not hit.
select 1
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index 41502cc..3a3c982 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -780,6 +780,57 @@ class TestDdlStatements(TestDdlBase):
self.client, "show create table {0}".format(orc_table))
assert any("ORC" in x for x in result.data)
+ @SkipIfHive2.acid
+ def test_create_table_transactional_type(self, vector, unique_database):
+ # When default_transactional_type query option is not specified, the transaction
+ # related table properties are not set.
+ non_acid_table = "{0}.non_acid_tbl".format(unique_database)
+ self.execute_query_expect_success(
+ self.client, "create table {0}(i int)".format(non_acid_table),
+ {"default_transactional_type": "none"})
+ props = self._get_properties("Table Parameters", non_acid_table)
+ assert "transactional" not in props
+ assert "transactional_properties" not in props
+
+ # Create table as "insert_only" transactional.
+ insert_only_acid_table = "{0}.insert_only_acid_tbl".format(unique_database)
+ self.execute_query_expect_success(
+ self.client, "create table {0}(i int)".format(insert_only_acid_table),
+ {"default_transactional_type": "insert_only"})
+ props = self._get_properties("Table Parameters", insert_only_acid_table)
+ assert props["transactional"] == "true"
+ assert props["transactional_properties"] == "insert_only"
+
+ # default_transactional_type query option should not affect external tables
+ external_table = "{0}.external_tbl".format(unique_database)
+ self.execute_query_expect_success(
+ self.client, "create external table {0}(i int)".format(external_table),
+ {"default_transactional_type": "insert_only"})
+ props = self._get_properties("Table Parameters", external_table)
+ assert "transactional" not in props
+ assert "transactional_properties" not in props
+
+ # default_transactional_type query option should not affect Kudu tables.
+ kudu_table = "{0}.kudu_tbl".format(unique_database)
+ self.execute_query_expect_success(
+ self.client,
+ "create table {0}(i int primary key) stored as kudu".format(kudu_table),
+ {"default_transactional_type": "insert_only"})
+ props = self._get_properties("Table Parameters", kudu_table)
+ assert "transactional" not in props
+ assert "transactional_properties" not in props
+
+ # default_transactional_type query option should have no effect when transactional
+ # table properties are set manually.
+ manual_acid_table = "{0}.manual_acid_tbl".format(unique_database)
+ self.execute_query_expect_success(
+ self.client, "create table {0}(i int) TBLPROPERTIES ('transactional'='false')"
+ .format(manual_acid_table),
+ {"default_transactional_type": "insert_only"})
+ props = self._get_properties("Table Parameters", manual_acid_table)
+ assert "transactional" not in props
+ assert "transactional_properties" not in props
+
def test_kudu_column_comment(self, vector, unique_database):
table = "{0}.kudu_table0".format(unique_database)
self.client.execute("create table {0}(x int comment 'x' primary key) \