You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/08/06 16:39:14 UTC

[impala] 02/02: IMPALA-8771: Missing stats warning for complex type columns

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 227b839e4e71778b74b045331682317e29014c7c
Author: Tamas Mate <tm...@cloudera.com>
AuthorDate: Wed Jul 31 16:20:06 2019 +0200

    IMPALA-8771: Missing stats warning for complex type columns
    
    An extra condition is added to the table stats checking, so that the
    complex type columns are skipped and can not trigger missing stats
    warning.
    
    Change-Id: Ia1b5c14da0c7f6eab373d80b2dbf7c974b2eb567
    Reviewed-on: http://gerrit.cloudera.org:8080/13965
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
    Tested-by: Tim Armstrong <ta...@cloudera.com>
---
 .../java/org/apache/impala/planner/ScanNode.java   |  8 +++++++-
 .../queries/PlannerTest/resource-requirements.test | 24 ----------------------
 .../compute-stats-complextype-warning.test         | 17 +++++++++++++++
 .../queries/QueryTest/show-stats.test              |  5 +++--
 tests/metadata/test_compute_stats.py               |  5 +++++
 5 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/ScanNode.java b/fe/src/main/java/org/apache/impala/planner/ScanNode.java
index 78c24af..78f3510 100644
--- a/fe/src/main/java/org/apache/impala/planner/ScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/ScanNode.java
@@ -180,9 +180,15 @@ abstract public class ScanNode extends PlanNode {
     return false;
   }
 
+  /**
+   * Returns true if the column does not have stats, complex type columns are skipped.
+   */
   public boolean isTableMissingColumnStats() {
     for (SlotDescriptor slot: desc_.getSlots()) {
-      if (slot.getColumn() != null && !slot.getStats().hasStats()) return true;
+      if (slot.getColumn() != null && !slot.getStats().hasStats() &&
+          !slot.getColumn().getType().isComplexType()) {
+        return true;
+      }
     }
     return false;
   }
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test b/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test
index 5faf6d3..25abffb 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test
@@ -554,8 +554,6 @@ from tpch_nested_parquet.customer c, c.c_orders
 ---- PLAN
 Max Per-Host Resource Reservation: Memory=32.00MB Threads=2
 Per-Host Resource Estimates: Memory=88MB
-WARNING: The following tables are missing relevant table and/or column statistics.
-tpch_nested_parquet.customer
 Analyzed query: SELECT c_custkey, o_orderkey, o_orderstatus, o_totalprice,
 o_orderdate, o_orderpriority, o_clerk FROM tpch_nested_parquet.customer c,
 c.c_orders
@@ -605,8 +603,6 @@ from tpch_nested_parquet.customer c, c.c_orders
 ---- PLAN
 Max Per-Host Resource Reservation: Memory=8.00MB Threads=2
 Per-Host Resource Estimates: Memory=88MB
-WARNING: The following tables are missing relevant table and/or column statistics.
-tpch_nested_parquet.customer
 Analyzed query: SELECT c_custkey, o_orderkey, pos FROM
 tpch_nested_parquet.customer c, c.c_orders
 
@@ -656,8 +652,6 @@ from tpch_nested_parquet.customer c, c.c_orders
 ---- PLAN
 Max Per-Host Resource Reservation: Memory=8.00MB Threads=2
 Per-Host Resource Estimates: Memory=88MB
-WARNING: The following tables are missing relevant table and/or column statistics.
-tpch_nested_parquet.customer
 Analyzed query: SELECT c_custkey, pos FROM tpch_nested_parquet.customer c,
 c.c_orders
 
@@ -707,8 +701,6 @@ from tpch_nested_parquet.customer c, c.c_orders
 ---- PLAN
 Max Per-Host Resource Reservation: Memory=8.00MB Threads=2
 Per-Host Resource Estimates: Memory=88MB
-WARNING: The following tables are missing relevant table and/or column statistics.
-tpch_nested_parquet.customer
 Analyzed query: SELECT c_custkey FROM tpch_nested_parquet.customer c, c.c_orders
 
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
@@ -756,8 +748,6 @@ from tpch_nested_parquet.customer c, c.c_orders
 ---- PLAN
 Max Per-Host Resource Reservation: Memory=4.00MB Threads=2
 Per-Host Resource Estimates: Memory=88MB
-WARNING: The following tables are missing relevant table and/or column statistics.
-tpch_nested_parquet.customer
 Analyzed query: SELECT o_orderkey FROM tpch_nested_parquet.customer c,
 c.c_orders
 
@@ -806,8 +796,6 @@ from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems
 ---- PLAN
 Max Per-Host Resource Reservation: Memory=16.00MB Threads=2
 Per-Host Resource Estimates: Memory=88MB
-WARNING: The following tables are missing relevant table and/or column statistics.
-tpch_nested_parquet.customer
 Analyzed query: SELECT c_custkey, o_orderkey, l_comment FROM
 tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems
 
@@ -4521,8 +4509,6 @@ from tpch_nested_parquet.customer c,
 ---- PLAN
 Max Per-Host Resource Reservation: Memory=85.94MB Threads=2
 Per-Host Resource Estimates: Memory=346MB
-WARNING: The following tables are missing relevant table and/or column statistics.
-tpch_nested_parquet.customer
 Analyzed query: SELECT DISTINCT c_name, v.o_orderkey, v.o_orderstatus FROM
 tpch_nested_parquet.customer c, (SELECT DISTINCT o1.o_orderkey, o2.o_orderstatus
 FROM c.c_orders o1 INNER JOIN c.c_orders o2 ON o1.o_orderkey = o2.o_orderkey
@@ -4598,8 +4584,6 @@ PLAN-ROOT SINK
 ---- DISTRIBUTEDPLAN
 Max Per-Host Resource Reservation: Memory=119.94MB Threads=4
 Per-Host Resource Estimates: Memory=494MB
-WARNING: The following tables are missing relevant table and/or column statistics.
-tpch_nested_parquet.customer
 Analyzed query: SELECT DISTINCT c_name, v.o_orderkey, v.o_orderstatus FROM
 tpch_nested_parquet.customer c, (SELECT DISTINCT o1.o_orderkey, o2.o_orderstatus
 FROM c.c_orders o1 INNER JOIN c.c_orders o2 ON o1.o_orderkey = o2.o_orderkey
@@ -4695,8 +4679,6 @@ Per-Host Resources: mem-estimate=345.94MB mem-reservation=85.94MB thread-reserva
 ---- PARALLELPLANS
 Max Per-Host Resource Reservation: Memory=239.88MB Threads=5
 Per-Host Resource Estimates: Memory=979MB
-WARNING: The following tables are missing relevant table and/or column statistics.
-tpch_nested_parquet.customer
 Analyzed query: SELECT DISTINCT c_name, v.o_orderkey, v.o_orderstatus FROM
 tpch_nested_parquet.customer c, (SELECT DISTINCT o1.o_orderkey, o2.o_orderstatus
 FROM c.c_orders o1 INNER JOIN c.c_orders o2 ON o1.o_orderkey = o2.o_orderkey
@@ -4801,8 +4783,6 @@ from tpch_nested_parquet.customer c,
 ---- PLAN
 Max Per-Host Resource Reservation: Memory=104.00MB Threads=2
 Per-Host Resource Estimates: Memory=136MB
-WARNING: The following tables are missing relevant table and/or column statistics.
-tpch_nested_parquet.customer
 Analyzed query: SELECT * FROM tpch_nested_parquet.customer c, (SELECT *,
 row_number() OVER (ORDER BY o_totalprice ASC) rnum_price, row_number() OVER
 (ORDER BY o_orderdate ASC) rnum_date, row_number() OVER (ORDER BY
@@ -4889,8 +4869,6 @@ PLAN-ROOT SINK
 ---- DISTRIBUTEDPLAN
 Max Per-Host Resource Reservation: Memory=104.00MB Threads=3
 Per-Host Resource Estimates: Memory=147MB
-WARNING: The following tables are missing relevant table and/or column statistics.
-tpch_nested_parquet.customer
 Analyzed query: SELECT * FROM tpch_nested_parquet.customer c, (SELECT *,
 row_number() OVER (ORDER BY o_totalprice ASC) rnum_price, row_number() OVER
 (ORDER BY o_orderdate ASC) rnum_date, row_number() OVER (ORDER BY
@@ -4984,8 +4962,6 @@ Per-Host Resources: mem-estimate=136.00MB mem-reservation=104.00MB thread-reserv
 ---- PARALLELPLANS
 Max Per-Host Resource Reservation: Memory=208.00MB Threads=3
 Per-Host Resource Estimates: Memory=284MB
-WARNING: The following tables are missing relevant table and/or column statistics.
-tpch_nested_parquet.customer
 Analyzed query: SELECT * FROM tpch_nested_parquet.customer c, (SELECT *,
 row_number() OVER (ORDER BY o_totalprice ASC) rnum_price, row_number() OVER
 (ORDER BY o_orderdate ASC) rnum_date, row_number() OVER (ORDER BY
diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats-complextype-warning.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-complextype-warning.test
new file mode 100644
index 0000000..468000f
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats-complextype-warning.test
@@ -0,0 +1,17 @@
+====
+---- QUERY
+# The missing stats warning should appear when the stats are not available,
+# in this case it is due to missing table stats.
+create table if not exists complex_collection (list ARRAY < STRING >);
+explain select count(*) from complex_collection c, c.list;
+---- RESULTS: VERIFY_IS_SUBSET
+'WARNING: The following tables are missing relevant table and/or column statistics.'
+====
+---- QUERY
+# Although stats are not available for complex types the missing stats warning
+# should not appear when stats are available.
+compute stats complex_collection;
+explain select count(*) from complex_collection c, c.list;
+---- RESULTS: VERIFY_IS_NOT_IN
+'WARNING: The following tables are missing relevant table and/or column statistics.'
+====
\ No newline at end of file
diff --git a/testdata/workloads/functional-query/queries/QueryTest/show-stats.test b/testdata/workloads/functional-query/queries/QueryTest/show-stats.test
index d820a8c..73cdfb1 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/show-stats.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/show-stats.test
@@ -148,7 +148,8 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
 ====
 ---- QUERY
-# Column column stats for a table with complex types.
+# Column stats for a table with complex types.
+# TODO: when complex type stats are supported revisit: compute-stats-complextype-warning.test
 show column stats functional.allcomplextypes
 ---- LABELS
 COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
@@ -170,4 +171,4 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 'month','INT',0,0,4,4
 ---- TYPES
 STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE
-====
\ No newline at end of file
+====
diff --git a/tests/metadata/test_compute_stats.py b/tests/metadata/test_compute_stats.py
index ec81909..70e99f5 100644
--- a/tests/metadata/test_compute_stats.py
+++ b/tests/metadata/test_compute_stats.py
@@ -81,6 +81,11 @@ class TestComputeStats(ImpalaTestSuite):
   def test_compute_stats_incremental(self, vector, unique_database):
     self.run_test_case('QueryTest/compute-stats-incremental', vector, unique_database)
 
+  @SkipIfS3.eventually_consistent
+  def test_compute_stats_complextype_warning(self, vector, unique_database):
+    self.run_test_case('QueryTest/compute-stats-complextype-warning', vector,
+        unique_database)
+
   @pytest.mark.execute_serially
   @SkipIfS3.eventually_consistent
   def test_compute_stats_many_partitions(self, vector):