You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2022/09/10 05:07:29 UTC
[impala] branch master updated: IMPALA-9499: Display support for all complex types in a SELECT * query
This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 04b5319e6 IMPALA-9499: Display support for all complex types in a SELECT * query
04b5319e6 is described below
commit 04b5319e6e8417226c28f4fe8386e86ce488a4c2
Author: Peter Rozsa <pr...@cloudera.com>
AuthorDate: Thu Aug 11 09:24:09 2022 +0200
IMPALA-9499: Display support for all complex types in a SELECT * query
This change adds EXPAND_COMPLEX_TYPES query option to support the
display of complex types in SELECT statements where star (*) expression
is in the select list. By default, the query option is disabled. When
it's enabled, it changes the behaviour of star expansion to list all
top-level column fields including ones with complex types, instead of
listing the scalar column fields only. Nested complex type expansion is
also supported, eg.: struct.* will enumerate the members of the struct.
Array, map and struct types are supported.
Testing:
- Analyzer tests check select statements when the query option is
enabled or disabled.
- EE tests check the proper complex type deserialization when the query
option is enabled, and the original behaviour when the option is
disabled.
Change-Id: I84b5e5703f9e0ce0f4f8bff83941677dd7489974
Reviewed-on: http://gerrit.cloudera.org:8080/18863
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/service/query-options.cc | 4 +
be/src/service/query-options.h | 5 +-
common/thrift/ImpalaService.thrift | 4 +
common/thrift/Query.thrift | 3 +
.../org/apache/impala/analysis/InlineViewRef.java | 6 +-
.../org/apache/impala/analysis/SelectStmt.java | 15 +-
.../org/apache/impala/catalog/StructField.java | 6 +
.../apache/impala/analysis/AnalyzeStmtsTest.java | 28 +++-
.../QueryTest/nested-types-star-expansion.test | 164 +++++++++++++++++++++
tests/query_test/test_nested_types.py | 31 ++++
10 files changed, 255 insertions(+), 11 deletions(-)
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index 7942787db..d8245a8c2 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -1229,6 +1229,10 @@ Status impala::SetQueryOption(const string& key, const string& value,
query_options->__set_orc_schema_resolution(enum_type);
break;
}
+ case TImpalaQueryOptions::EXPAND_COMPLEX_TYPES: {
+ query_options->__set_expand_complex_types(IsTrue(value));
+ break;
+ }
default:
if (IsRemovedQueryOption(key)) {
LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'";
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index b459f8b81..75e4c83ea 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -50,7 +50,7 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
// time we add or remove a query option to/from the enum TImpalaQueryOptions.
#define QUERY_OPTS_TABLE \
DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(), \
- TImpalaQueryOptions::ORC_SCHEMA_RESOLUTION + 1); \
+ TImpalaQueryOptions::EXPAND_COMPLEX_TYPES + 1); \
REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED) \
QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR) \
REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS) \
@@ -274,7 +274,8 @@ typedef std::unordered_map<string, beeswax::TQueryOptionLevel::type>
QUERY_OPT_FN(enable_replan, ENABLE_REPLAN, TQueryOptionLevel::ADVANCED) \
QUERY_OPT_FN(test_replan, TEST_REPLAN, TQueryOptionLevel::ADVANCED) \
QUERY_OPT_FN(lock_max_wait_time_s, LOCK_MAX_WAIT_TIME_S, TQueryOptionLevel::REGULAR) \
- QUERY_OPT_FN(orc_schema_resolution, ORC_SCHEMA_RESOLUTION, TQueryOptionLevel::REGULAR);
+ QUERY_OPT_FN(orc_schema_resolution, ORC_SCHEMA_RESOLUTION, TQueryOptionLevel::REGULAR) \
+ QUERY_OPT_FN(expand_complex_types, EXPAND_COMPLEX_TYPES, TQueryOptionLevel::REGULAR);
/// Enforce practical limits on some query options to avoid undesired query state.
static const int64_t SPILLABLE_BUFFER_LIMIT = 1LL << 40; // 1 TB
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index 60844d6c6..715b2df64 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -736,6 +736,10 @@ enum TImpalaQueryOptions {
// Determines how to resolve ORC files' schemas. Valid values are "position" and "name".
ORC_SCHEMA_RESOLUTION = 146;
+
+ // Expands complex types in star queries
+ EXPAND_COMPLEX_TYPES = 147
+
}
// The summary of a DML statement.
diff --git a/common/thrift/Query.thrift b/common/thrift/Query.thrift
index 9cd41caa6..806f2dcd3 100644
--- a/common/thrift/Query.thrift
+++ b/common/thrift/Query.thrift
@@ -594,6 +594,9 @@ struct TQueryOptions {
// See comment in ImpalaService.thrift
147: optional TSchemaResolutionStrategy orc_schema_resolution = 0;
+
+ // See comment in ImpalaService.thrift
+ 148: optional bool expand_complex_types = false;
}
// Impala currently has three types of sessions: Beeswax, HiveServer2 and external
diff --git a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
index 944348e5c..7a233eb5f 100644
--- a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java
@@ -455,7 +455,9 @@ public class InlineViewRef extends TableRef {
int numColLabels = getColLabels().size();
Preconditions.checkState(numColLabels > 0);
Set<String> uniqueColAliases = Sets.newHashSetWithExpectedSize(numColLabels);
- List<StructField> fields = Lists.newArrayListWithCapacity(numColLabels);
+ // Using linked set to preserve order and uniqueness of fields. If using a
+ // list, star-expanded complex columns would be enumerated multiple times.
+ Set<StructField> fields = Sets.newLinkedHashSetWithExpectedSize(numColLabels);
for (int i = 0; i < numColLabels; ++i) {
// inline view select statement has been analyzed. Col label should be filled.
Expr selectItemExpr = queryStmt_.getResultExprs().get(i);
@@ -512,7 +514,7 @@ public class InlineViewRef extends TableRef {
result.setMaskedTable(baseTbl);
}
}
- result.setType(new StructType(fields));
+ result.setType(new StructType(Lists.newArrayList(fields)));
return result;
}
diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
index 8774fbb39..2ca45c8b9 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java
@@ -793,24 +793,27 @@ public class SelectStmt extends QueryStmt {
/**
* Helper function used during star expansion to add a single result expr
* based on a given raw path to be resolved relative to an existing path.
- * Ignores paths with a complex-typed destination because they are currently
- * illegal in any select list (even for inline views, etc.)
*/
private void addStarResultExpr(Path resolvedPath,
String... relRawPath) throws AnalysisException {
Path p = Path.createRelPath(resolvedPath, relRawPath);
Preconditions.checkState(p.resolve());
- if (p.destType().isComplexType()) return;
- SlotDescriptor slotDesc = analyzer_.registerSlotRef(p);
+ if (p.destType().isComplexType() &&
+ !analyzer_.getQueryCtx().client_request.query_options.expand_complex_types) {
+ return;
+ }
+ SlotDescriptor slotDesc = analyzer_.registerSlotRef(p, false);
SlotRef slotRef = new SlotRef(slotDesc);
Preconditions.checkState(slotRef.isAnalyzed(),
"Analysis should be done in constructor");
+
+ if(slotRef.getType().isStructType()){
+ slotRef.reExpandStruct(analyzer_);
+ }
// Empty matched types means this is expanded from star of a catalog table.
// For star of complex types, e.g. my_struct.*, my_array.*, my_map.*, the matched
// types will have the complex type so it's not empty.
if (resolvedPath.getMatchedTypes().isEmpty()) {
- Preconditions.checkState(!slotDesc.getType().isComplexType(),
- "Star expansion should only introduce scalar columns");
analyzer_.registerColumnForMasking(slotDesc);
}
resultExprs_.add(slotRef);
diff --git a/fe/src/main/java/org/apache/impala/catalog/StructField.java b/fe/src/main/java/org/apache/impala/catalog/StructField.java
index 92af9a3d2..fdfbb672f 100644
--- a/fe/src/main/java/org/apache/impala/catalog/StructField.java
+++ b/fe/src/main/java/org/apache/impala/catalog/StructField.java
@@ -23,6 +23,8 @@ import org.apache.impala.thrift.TColumnType;
import org.apache.impala.thrift.TStructField;
import org.apache.impala.thrift.TTypeNode;
+import java.util.Objects;
+
/**
* TODO: Support comments for struct fields. The Metastore does not properly store
* comments of struct fields. We set comment_ to null to avoid compatibility issues.
@@ -99,4 +101,8 @@ public class StructField {
StructField otherStructField = (StructField) other;
return otherStructField.name_.equals(name_) && otherStructField.type_.equals(type_);
}
+ @Override
+ public int hashCode() {
+ return Objects.hash(name_, type_);
+ }
}
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
index 3dd0a30dd..d8b604809 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -1071,6 +1071,33 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
"Unable to INSERT into target table (default.new_tbl) because the column " +
"'tiny_struct' has a complex type 'STRUCT<b:BOOLEAN>' and Impala doesn't " +
"support inserting into tables containing complex type columns");
+
+ //Make complex types available in star queries
+ ctx.getQueryOptions().setExpand_complex_types(true);
+ //TODO: Once IMPALA-10851 is resolved it can be removed
+ ctx.getQueryOptions().setDisable_codegen(true);
+
+ AnalyzesOk("select * from functional_parquet.complextypes_structs",ctx);
+ AnalyzesOk("select * from functional_parquet.complextypes_nested_structs",ctx);
+ AnalyzesOk("select * from functional_parquet.complextypes_maps_view",ctx);
+
+ AnalyzesOk("select outer_struct.str, outer_struct.* from " +
+ "functional_parquet.complextypes_nested_structs",ctx);
+ AnalyzesOk("select * from (select * from " +
+ "functional_parquet.complextypes_nested_structs) v",ctx);
+ AnalyzesOk("select * from (select int_map, int_map_array from " +
+ "functional_parquet.complextypestbl) v",ctx);
+
+ ctx.getQueryOptions().setDisable_codegen(false);
+
+ AnalyzesOk("select * from functional_parquet.complextypes_arrays",ctx);
+ AnalyzesOk("select * from " +
+ "functional_parquet.complextypes_arrays_only_view",ctx);
+ AnalyzesOk("select v.id, v.* from " +
+ "(select * from functional_parquet.complextypes_arrays) v",ctx);
+
+ AnalysisError("select * from functional.allcomplextypes",
+ ctx,"STRUCT type inside collection types is not supported.");
}
@Test
@@ -4634,7 +4661,6 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
testNumberOfMembers(BaseTableRef.class, 0);
testNumberOfMembers(InlineViewRef.class, 10);
}
-
@SuppressWarnings("rawtypes")
private void testNumberOfMembers(Class cl, int expectedNumMembers) {
int actualNumMembers = 0;
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-star-expansion.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-star-expansion.test
new file mode 100644
index 000000000..8434252ea
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-star-expansion.test
@@ -0,0 +1,164 @@
+====
+---- QUERY
+# Expanding star on table with array type
+set EXPAND_COMPLEX_TYPES=true;
+select * from complextypes_arrays where id = 1
+---- RESULTS
+1,'[1,2,3,4,5]','["one","two","three","four","five"]'
+---- TYPES
+INT, STRING, STRING
+====
+---- QUERY
+# Expanding star on table with array type, and an additional scalar field, without EXPAND_COMPLEX_TYPES
+select id, * from complextypes_arrays where id = 2
+---- RESULTS
+2,2
+---- TYPES
+INT, INT
+====
+---- QUERY
+# Expanding star on table with array type, and an additional scalar field
+set EXPAND_COMPLEX_TYPES=true;
+select id, * from complextypes_arrays where id = 3
+---- RESULTS
+3,3,'[10,9,8]','["ten"]'
+---- TYPES
+INT, INT, STRING, STRING
+====
+---- QUERY
+# Expanding star on view with array type, and an additional scalar field
+set EXPAND_COMPLEX_TYPES=true;
+select id, * from complextypes_arrays_only_view where id = 4
+---- RESULTS
+4,4,'NULL','[]'
+---- TYPES
+BIGINT, BIGINT, STRING, STRING
+====
+---- QUERY
+# Expanding star on view with map type, and an additional scalar field
+set EXPAND_COMPLEX_TYPES=true;
+select id, * from complextypes_maps_view where id = 5
+---- RESULTS
+5,5,'{}','NULL'
+---- TYPES
+BIGINT, BIGINT, STRING, STRING
+====
+---- QUERY
+# Expanding star on views with array type, and an additional scalar field
+set EXPAND_COMPLEX_TYPES=true;
+select v1.id, * from complextypes_arrays_only_view v1, complextypes_arrays_only_view v2
+where v1.id = v2.id and v1.id = 6
+---- RESULTS
+6,6,'NULL','NULL',6,'NULL','NULL'
+---- TYPES
+BIGINT, BIGINT, STRING, STRING, BIGINT, STRING, STRING
+====
+---- QUERY
+# Expanding star on views with array type, and an additional scalar field, without EXPAND_COMPLEX_TYPES
+select v1.id, *
+from complextypes_arrays_only_view v1, complextypes_arrays_only_view v2
+where v1.id = v2.id and v1.id = 7
+---- RESULTS
+7,7,7
+---- TYPES
+BIGINT, BIGINT, BIGINT
+====
+---- QUERY
+# Expanding stars on view aliases with array type
+set EXPAND_COMPLEX_TYPES=true;
+select v1.*, v2.* from complextypes_arrays_only_view v1, complextypes_arrays_only_view v2
+where v1.id = v2.id and v1.id = 1
+---- RESULTS
+1,'[1,2,3]','[[1,2],[3,4]]',1,'[1,2,3]','[[1,2],[3,4]]'
+---- TYPES
+BIGINT, STRING, STRING, BIGINT, STRING, STRING
+====
+---- QUERY
+# Expanding star on table with struct type
+set EXPAND_COMPLEX_TYPES=true;
+select * from complextypes_structs where id = 2
+---- RESULTS
+2,'second item','{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11 12:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab ","vc":"varchar","de1":11223,"de2":null}','{"b":false}','{"i":19191,"s":"small_struct_str"}'
+---- TYPES
+INT, STRING, STRING, STRING, STRING
+====
+---- QUERY
+# Expanding star on table with struct type, and an additional scalar field
+set EXPAND_COMPLEX_TYPES=true;
+select id, * from complextypes_structs where id = 3
+---- RESULTS
+3,3,'third item','{"ti":null,"si":null,"i":null,"bi":null,"b":null,"f":null,"do":null,"da":null,"ts":null,"s1":null,"s2":null,"c1":null,"c2":null,"vc":null,"de1":null,"de2":null}','{"b":true}','{"i":98765,"s":null}'
+---- TYPES
+INT, INT, STRING, STRING, STRING, STRING
+====
+---- QUERY
+# Expanding star on tables with struct type, and an additional scalar field
+set EXPAND_COMPLEX_TYPES=true;
+select t1.id, *
+from complextypes_structs t1, complextypes_structs t2
+where t1.id = t2.id and t1.id = 4
+---- RESULTS
+4,4,'fourth item','{"ti":90,"si":30482,"i":1664336,"bi":23567459873,"b":true,"f":0.5600000023841858,"do":NaN,"da":"2000-12-31","ts":"2024-01-01 00:00:00.123400000","s1":"random string","s2":"","c1":"c","c2":"d ","vc":"addsdrr","de1":33357,"de2":null}','{"b":null}','{"i":null,"s":"str"}',4,'fourth item','{"ti":90,"si":30482,"i":1664336,"bi":23567459873,"b":true,"f":0.5600000023841858,"do":NaN,"da":"2000-12-31","ts":"2024-01-01 00:00:00.123400000","s1":"random string","s2":"","c1":"c","c2 [...]
+---- TYPES
+INT, INT, STRING, STRING, STRING, STRING, INT, STRING, STRING, STRING, STRING
+====
+---- QUERY
+# Expanding inner struct
+set EXPAND_COMPLEX_TYPES=true;
+select alltypes.* from complextypes_structs where alltypes.ti = 100
+---- RESULTS
+100,12348,156789012,163234345342,True,1234.56005859375,65323423.33,2021-05-30,2021-06-01 10:19:04,'some string','another str','x','xyz','somevarcha',12345,NULL
+---- TYPES
+TINYINT, SMALLINT, INT, BIGINT, BOOLEAN, FLOAT, DOUBLE, DATE, TIMESTAMP, STRING, STRING, CHAR, CHAR, VARCHAR, DECIMAL, DECIMAL
+====
+---- QUERY
+# Expanding struct and array
+set EXPAND_COMPLEX_TYPES=true;
+select * from complextypes_structs s, complextypes_arrays a
+where s.id = a.id and s.id = 5
+---- RESULTS
+5,'fifth item','NULL','{"b":false}','{"i":98765,"s":"abcde f"}',5,'[10,NULL,12]','["ten","eleven","twelve","thirteen"]'
+---- TYPES
+INT, STRING, STRING, STRING, STRING, INT, STRING, STRING
+====
+---- QUERY
+# Expanding star on tables with array and struct type, without EXPAND_COMPLEX_TYPES
+select a.*, s.*
+from complextypes_arrays a, complextypes_structs s
+where a.id = s.id and a.id = 6
+---- RESULTS
+6,6,'sixth item'
+---- TYPES
+INT, INT, STRING
+====
+---- QUERY
+# Expanding struct and array as separate aliases
+set EXPAND_COMPLEX_TYPES=true;
+select s.*, a.* from complextypes_structs s, complextypes_arrays a
+where s.id = a.id and s.id = 1
+---- RESULTS
+1,'first item','{"ti":100,"si":12348,"i":156789012,"bi":163234345342,"b":true,"f":1234.56005859375,"do":65323423.33,"da":"2021-05-30","ts":"2021-06-01 10:19:04","s1":"some string","s2":"another str","c1":"x","c2":"xyz","vc":"somevarcha","de1":12345,"de2":null}','{"b":true}','NULL',1,'[1,2,3,4,5]','["one","two","three","four","five"]'
+---- TYPES
+INT, STRING, STRING, STRING, STRING, INT, STRING, STRING
+====
+---- QUERY
+# Expanding struct, map and array as separate aliases
+set EXPAND_COMPLEX_TYPES=true;
+select s.*, a.*, m.*
+from complextypes_structs s, complextypes_arrays a, complextypes_maps_view m
+where s.id = a.id and s.id = m.id and s.id = 2
+---- RESULTS
+2,'second item','{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11 12:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab ","vc":"varchar","de1":11223,"de2":null}','{"b":false}','{"i":19191,"s":"small_struct_str"}',2,'[1,NULL,3,4,5]','["one","two","three",NULL,"five"]',2,'{"k1":2,"k2":NULL}','[{"k3":NULL,"k1":1},NULL,{}]'
+---- TYPES
+INT, STRING, STRING, STRING, STRING, INT, STRING, STRING, BIGINT, STRING, STRING
+====
+---- QUERY
+# Expanding struct, map and array as separate aliases, without EXPAND_COMPLEX_TYPES
+select s.*, a.*, m.*
+from complextypes_structs s, complextypes_arrays a, complextypes_maps_view m
+where s.id = a.id and s.id = m.id and s.id = 3
+---- RESULTS
+3,'third item',3,3
+---- TYPES
+INT, STRING, INT, BIGINT
+====
diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py
index 9b3ca59ca..4401e783f 100644
--- a/tests/query_test/test_nested_types.py
+++ b/tests/query_test/test_nested_types.py
@@ -877,3 +877,34 @@ class TestMaxNestingDepth(ImpalaTestSuite):
assert False, "Expected table loading to fail."
except ImpalaBeeswaxException, e:
assert "Type exceeds the maximum nesting depth" in str(e)
+
+
+class TestNestedTypesStarExpansion(ImpalaTestSuite):
+ """Functional tests for nested types when star expansion query
+ option (EXPAND_COMPLEX_TYPES) is enabled/disabled, run for all file formats that
+ support nested types."""
+
+ @classmethod
+ def get_workload(self):
+ return 'functional-query'
+
+ @classmethod
+ def add_test_dimensions(cls):
+ super(TestNestedTypesStarExpansion, cls).add_test_dimensions()
+ cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
+ v.get_value('table_format').file_format in ['parquet', 'orc'])
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
+ v.get_value('protocol') == 'hs2')
+ cls.ImpalaTestMatrix.add_dimension(
+ create_exec_option_dimension_from_dict({
+ 'disable_codegen': ['True']}))
+ cls.ImpalaTestMatrix.add_mandatory_exec_option(
+ 'convert_legacy_hive_parquet_utc_timestamps', 'true')
+ cls.ImpalaTestMatrix.add_mandatory_exec_option('TIMEZONE', '"Europe/Budapest"')
+
+ def test_star_expansion(self, vector):
+ # Queries with star (*) expression on tables with array, map
+ # and struct complex type, through views, with query option
+ # EXPAND_COMPLEX_TYPES enabled and disabled.
+ self.run_test_case('QueryTest/nested-types-star-expansion', vector)