You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by bo...@apache.org on 2019/05/28 10:26:17 UTC

[impala] 08/08: IMPALA-8369: Add HIVE_MAJOR_VERSION section to planner tests + some fixes

This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 5ce57cafb20f4627054c6c1d0d2a79e9ae97fd78
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Thu May 23 21:49:30 2019 +0200

    IMPALA-8369: Add HIVE_MAJOR_VERSION section to planner tests + some fixes
    
    Hive 3 creates different number of files for some tables than Hive2,
    which broke some test cases in resource-requirements.test. The fix
    is to run different versions of these tests depending on Hive version.
    
    This is done by adding a new section HIVE_MAJOR_VERSION, which leads
    to skipping the given test case if the Hive version is different in
    the cluster, e.g.:
    --- HIVE_MAJOR_VERSION
    3
    
    Change-Id: Ied7ba7911da23cbca12149e062f4e1a444613a36
    Reviewed-on: http://gerrit.cloudera.org:8080/13414
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
---
 .../org/apache/impala/planner/PlannerTestBase.java |  12 ++
 .../org/apache/impala/testutil/TestFileParser.java |   3 +-
 .../queries/PlannerTest/resource-requirements.test | 140 +++++++++++++++++++++
 3 files changed, 154 insertions(+), 1 deletion(-)

diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
index dc420f8..2917b08 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTestBase.java
@@ -876,6 +876,18 @@ public class PlannerTestBase extends FrontendTestBase {
     for (TestCase testCase : queryFileParser.getTestCases()) {
       actualOutput.append(testCase.getSectionAsString(Section.QUERY, true, "\n"));
       actualOutput.append("\n");
+
+      String neededHiveMajorVersion =
+          testCase.getSectionAsString(Section.HIVE_MAJOR_VERSION, false, "");
+      if (neededHiveMajorVersion != null && !neededHiveMajorVersion.isEmpty() &&
+          Integer.parseInt(neededHiveMajorVersion) != TestUtils.getHiveMajorVersion()) {
+        actualOutput.append("Skipping test case (needs Hive major version: ");
+        actualOutput.append(neededHiveMajorVersion);
+        actualOutput.append(")\n");
+        actualOutput.append("====\n");
+        continue;
+      }
+
       String queryOptionsSection = testCase.getSectionAsString(
           Section.QUERYOPTIONS, true, "\n");
       if (queryOptionsSection != null && !queryOptionsSection.isEmpty()) {
diff --git a/fe/src/test/java/org/apache/impala/testutil/TestFileParser.java b/fe/src/test/java/org/apache/impala/testutil/TestFileParser.java
index fb04704..c583de8 100644
--- a/fe/src/test/java/org/apache/impala/testutil/TestFileParser.java
+++ b/fe/src/test/java/org/apache/impala/testutil/TestFileParser.java
@@ -95,7 +95,8 @@ public class TestFileParser {
     ERRORS,
     SCANRANGELOCATIONS,
     LINEAGE,
-    QUERYOPTIONS;
+    QUERYOPTIONS,
+    HIVE_MAJOR_VERSION;
 
     // Return header line for this section
     public String getHeader() {
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test b/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test
index fb06170..5faf6d3 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test
@@ -1102,6 +1102,9 @@ Per-Host Resources: mem-estimate=32.00MB mem-reservation=64.00KB thread-reservat
 ====
 # Avro scan.
 select * from tpch_avro.orders
+---- HIVE_MAJOR_VERSION
+# Hive 3 creates different number of files for this table than Hive 2.
+2
 ---- PLAN
 Max Per-Host Resource Reservation: Memory=8.00MB Threads=2
 Per-Host Resource Estimates: Memory=88MB
@@ -1180,6 +1183,88 @@ Per-Host Resources: mem-estimate=176.00MB mem-reservation=16.00MB thread-reserva
    tuple-ids=0 row-size=88B cardinality=unavailable
    in pipelines: 00(GETNEXT)
 ====
+select * from tpch_avro.orders
+---- HIVE_MAJOR_VERSION
+# Hive 3 creates different number of files for this table than Hive 2.
+3
+---- PLAN
+Max Per-Host Resource Reservation: Memory=8.00MB Threads=2
+Per-Host Resource Estimates: Memory=64MB
+WARNING: The following tables are missing relevant table and/or column statistics.
+tpch_avro.orders
+Analyzed query: SELECT * FROM tpch_avro.orders
+
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=64.00MB mem-reservation=8.00MB thread-reservation=2
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+00:SCAN HDFS [tpch_avro.orders]
+   HDFS partitions=1/1 files=3 size=156.92MB
+   stored statistics:
+     table: rows=unavailable size=156.92MB
+     columns: unavailable
+   extrapolated-rows=disabled max-scan-range-rows=unavailable
+   mem-estimate=64.00MB mem-reservation=8.00MB thread-reservation=1
+   tuple-ids=0 row-size=88B cardinality=unavailable
+   in pipelines: 00(GETNEXT)
+---- DISTRIBUTEDPLAN
+Max Per-Host Resource Reservation: Memory=8.00MB Threads=3
+Per-Host Resource Estimates: Memory=64MB
+WARNING: The following tables are missing relevant table and/or column statistics.
+tpch_avro.orders
+Analyzed query: SELECT * FROM tpch_avro.orders
+
+F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=275.97KB mem-reservation=0B thread-reservation=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+01:EXCHANGE [UNPARTITIONED]
+|  mem-estimate=275.97KB mem-reservation=0B thread-reservation=0
+|  tuple-ids=0 row-size=88B cardinality=unavailable
+|  in pipelines: 00(GETNEXT)
+|
+F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
+Per-Host Resources: mem-estimate=64.00MB mem-reservation=8.00MB thread-reservation=2
+00:SCAN HDFS [tpch_avro.orders, RANDOM]
+   HDFS partitions=1/1 files=3 size=156.92MB
+   stored statistics:
+     table: rows=unavailable size=156.92MB
+     columns: unavailable
+   extrapolated-rows=disabled max-scan-range-rows=unavailable
+   mem-estimate=64.00MB mem-reservation=8.00MB thread-reservation=1
+   tuple-ids=0 row-size=88B cardinality=unavailable
+   in pipelines: 00(GETNEXT)
+---- PARALLELPLANS
+Max Per-Host Resource Reservation: Memory=16.00MB Threads=5
+Per-Host Resource Estimates: Memory=129MB
+WARNING: The following tables are missing relevant table and/or column statistics.
+tpch_avro.orders
+Analyzed query: SELECT * FROM tpch_avro.orders
+
+F01:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=551.97KB mem-reservation=0B thread-reservation=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+01:EXCHANGE [UNPARTITIONED]
+|  mem-estimate=551.97KB mem-reservation=0B thread-reservation=0
+|  tuple-ids=0 row-size=88B cardinality=unavailable
+|  in pipelines: 00(GETNEXT)
+|
+F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6
+Per-Host Resources: mem-estimate=128.00MB mem-reservation=16.00MB thread-reservation=4
+00:SCAN HDFS [tpch_avro.orders, RANDOM]
+   HDFS partitions=1/1 files=3 size=156.92MB
+   stored statistics:
+     table: rows=unavailable size=156.92MB
+     columns: unavailable
+   extrapolated-rows=disabled max-scan-range-rows=unavailable
+   mem-estimate=64.00MB mem-reservation=8.00MB thread-reservation=1
+   tuple-ids=0 row-size=88B cardinality=unavailable
+   in pipelines: 00(GETNEXT)
+====
 # RC scan.
 select * from tpch_rc.customer
 ---- PLAN
@@ -1342,6 +1427,9 @@ Per-Host Resources: mem-estimate=32.00MB mem-reservation=16.00MB thread-reservat
 ====
 # ORC scan
 select * from tpch_orc_def.lineitem
+---- HIVE_MAJOR_VERSION
+# Hive 3 creates different number of files for this table than Hive 2.
+2
 ---- PLAN
 Max Per-Host Resource Reservation: Memory=8.00MB Threads=2
 Per-Host Resource Estimates: Memory=40MB
@@ -1362,8 +1450,36 @@ PLAN-ROOT SINK
    tuple-ids=0 row-size=231B cardinality=6.00M
    in pipelines: 00(GETNEXT)
 ====
+# ORC scan
+select * from tpch_orc_def.lineitem
+---- HIVE_MAJOR_VERSION
+# Hive 3 creates different number of files for this table than Hive 2.
+3
+---- PLAN
+Max Per-Host Resource Reservation: Memory=8.00MB Threads=2
+Per-Host Resource Estimates: Memory=88MB
+Analyzed query: SELECT * FROM tpch_orc_def.lineitem
+
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=88.00MB mem-reservation=8.00MB thread-reservation=2
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+00:SCAN HDFS [tpch_orc_def.lineitem]
+   HDFS partitions=1/1 files=1 size=142.84MB
+   stored statistics:
+     table: rows=6.00M size=142.84MB
+     columns: all
+   extrapolated-rows=disabled max-scan-range-rows=6.00M
+   mem-estimate=88.00MB mem-reservation=8.00MB thread-reservation=1
+   tuple-ids=0 row-size=231B cardinality=6.00M
+   in pipelines: 00(GETNEXT)
+====
 # Single column ORC scan - memory reservation is same as multi-column scan.
 select l_comment from tpch_orc_def.lineitem
+---- HIVE_MAJOR_VERSION
+# Hive 3 creates different number of files for this table than Hive 2.
+2
 ---- PLAN
 Max Per-Host Resource Reservation: Memory=8.00MB Threads=2
 Per-Host Resource Estimates: Memory=40MB
@@ -1384,6 +1500,30 @@ PLAN-ROOT SINK
    tuple-ids=0 row-size=38B cardinality=6.00M
    in pipelines: 00(GETNEXT)
 ====
+select l_comment from tpch_orc_def.lineitem
+---- HIVE_MAJOR_VERSION
+# Hive 3 creates different number of files for this table than Hive 2.
+3
+---- PLAN
+Max Per-Host Resource Reservation: Memory=8.00MB Threads=2
+Per-Host Resource Estimates: Memory=88MB
+Analyzed query: SELECT l_comment FROM tpch_orc_def.lineitem
+
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+|  Per-Host Resources: mem-estimate=88.00MB mem-reservation=8.00MB thread-reservation=2
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B thread-reservation=0
+|
+00:SCAN HDFS [tpch_orc_def.lineitem]
+   HDFS partitions=1/1 files=1 size=142.84MB
+   stored statistics:
+     table: rows=6.00M size=142.84MB
+     columns: all
+   extrapolated-rows=disabled max-scan-range-rows=6.00M
+   mem-estimate=88.00MB mem-reservation=8.00MB thread-reservation=1
+   tuple-ids=0 row-size=38B cardinality=6.00M
+   in pipelines: 00(GETNEXT)
+====
 # ORC scan on small files - memory reservation is reduced.
 select * from functional_orc_def.alltypes
 ---- PLAN