You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/02/14 23:21:05 UTC

[1/5] impala git commit: IMPALA-6511: Fix state machine in FIS::UpdateState()

Repository: impala
Updated Branches:
  refs/heads/2.x 3d7d8209e -> 09962ad9d


IMPALA-6511: Fix state machine in FIS::UpdateState()

LAST_BATCH_SENT must always happen in state PRODUCING_DATA. This change
also marks "Open Finished" when receiving WAITING_FOR_FIRST_BATCH.

Change-Id: I7304205d245289cc3d7ca2217e212c844ee75e7b
Reviewed-on: http://gerrit.cloudera.org:8080/9294
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/fd067e43
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/fd067e43
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/fd067e43

Branch: refs/heads/2.x
Commit: fd067e43e7fa2954d6801ad8762c9d611add58b0
Parents: 3d7d820
Author: Lars Volker <lv...@cloudera.com>
Authored: Mon Feb 12 18:27:32 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Feb 14 00:53:39 2018 +0000

----------------------------------------------------------------------
 be/src/runtime/fragment-instance-state.cc | 7 ++-----
 tests/query_test/test_observability.py    | 1 +
 2 files changed, 3 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/fd067e43/be/src/runtime/fragment-instance-state.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/fragment-instance-state.cc b/be/src/runtime/fragment-instance-state.cc
index f36c91d..91b33a7 100644
--- a/be/src/runtime/fragment-instance-state.cc
+++ b/be/src/runtime/fragment-instance-state.cc
@@ -407,6 +407,7 @@ void FragmentInstanceState::UpdateState(const StateEvent event)
 
     case StateEvent::WAITING_FOR_FIRST_BATCH:
       DCHECK_EQ(current_state, TFInstanceExecState::WAITING_FOR_OPEN);
+      event_sequence_->MarkEvent("Open Finished");
       next_state = TFInstanceExecState::WAITING_FOR_FIRST_BATCH;
       break;
 
@@ -429,11 +430,7 @@ void FragmentInstanceState::UpdateState(const StateEvent event)
       break;
 
     case StateEvent::LAST_BATCH_SENT:
-      if (UNLIKELY(current_state == TFInstanceExecState::WAITING_FOR_OPEN)) {
-        event_sequence_->MarkEvent("Open Finished");
-      } else {
-        DCHECK_EQ(current_state, TFInstanceExecState::PRODUCING_DATA);
-      }
+      DCHECK_EQ(current_state, TFInstanceExecState::PRODUCING_DATA);
       next_state = TFInstanceExecState::LAST_BATCH_SENT;
       break;
 

http://git-wip-us.apache.org/repos/asf/impala/blob/fd067e43/tests/query_test/test_observability.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py
index 9c0bb65..10ef6b2 100644
--- a/tests/query_test/test_observability.py
+++ b/tests/query_test/test_observability.py
@@ -189,6 +189,7 @@ class TestObservability(ImpalaTestSuite):
     instances, even when there are errors."""
     events = ["Fragment Instance Lifecycle Event Timeline",
               "Prepare Finished",
+              "Open Finished",
               "First Batch Produced",
               "First Batch Sent",
               "ExecInternal Finished"]


[5/5] impala git commit: IMPALA-5269: Fix issue with final line of query followed by a comment

Posted by ta...@apache.org.
IMPALA-5269: Fix issue with final line of query followed by a comment

The patch is to remove any comments in a statement when checking if a
statement ends with a semicolon delimiter.

For example:

Before (semicolon delimiter is needed at the end):
select 1 + 1; -- comment\n;

After (semicolon delimiter is no longer needed):
select 1 + 1; -- comment

Testing:
- Ran end-to-end tests in shell

Change-Id: I54f9a8f65214023520eaa010fc462a663d02d258
Reviewed-on: http://gerrit.cloudera.org:8080/9191
Reviewed-by: Fredy Wijaya <fw...@cloudera.com>
Reviewed-by: Taras Bobrovytsky <tb...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/09962ad9
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/09962ad9
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/09962ad9

Branch: refs/heads/2.x
Commit: 09962ad9dbe33e54b57b5e0e2fb6a471c6c53021
Parents: 889e08d
Author: Fredy Wijaya <fw...@cloudera.com>
Authored: Fri Feb 2 02:42:45 2018 -0600
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Feb 14 08:09:59 2018 +0000

----------------------------------------------------------------------
 shell/impala_shell.py                 |  4 +++
 tests/shell/test_shell_interactive.py | 39 ++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/09962ad9/shell/impala_shell.py
----------------------------------------------------------------------
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 93e10fc..2aca26b 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -385,6 +385,10 @@ class ImpalaShell(object, cmd.Cmd):
     not considered terminated. If no open quotation is found, it's considered
     terminated.
     """
+    # Strip any comments to make a statement such as the following be considered as
+    # ending with a delimiter:
+    # select 1 + 1; -- this is a comment
+    line = sqlparse.format(line, strip_comments=True).rstrip()
     if line.endswith(ImpalaShell.CMD_DELIM):
       try:
         # Look for an open quotation in the entire command, and not just the

http://git-wip-us.apache.org/repos/asf/impala/blob/09962ad9/tests/shell/test_shell_interactive.py
----------------------------------------------------------------------
diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py
index a104809..67da3b5 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -434,6 +434,45 @@ class TestImpalaShellInteractive(object):
     finally:
       os.chdir(cwd)
 
+  @pytest.mark.execute_serially
+  def test_line_ends_with_comment(self):
+    # IMPALA-5269: Test lines that end with a comment.
+    queries = ['select 1 + 1; --comment',
+               'select 1 + 1 --comment\n;']
+    for query in queries:
+      result = run_impala_shell_interactive(query)
+      assert '| 1 + 1 |' in result.stdout
+      assert '| 2     |' in result.stdout
+
+    queries = ['select \'some string\'; --comment',
+               'select \'some string\' --comment\n;']
+    for query in queries:
+      result = run_impala_shell_interactive(query)
+      assert '| \'some string\' |' in result.stdout
+      assert '| some string   |' in result.stdout
+
+    queries = ['select "--"; -- "--"',
+               'select \'--\'; -- "--"',
+               'select "--" -- "--"\n;',
+               'select \'--\' -- "--"\n;']
+    for query in queries:
+      result = run_impala_shell_interactive(query)
+      assert '| \'--\' |' in result.stdout
+      assert '| --   |' in result.stdout
+
+    query = ('select * from (\n' +
+             'select count(*) from functional.alltypes\n' +
+             ') v; -- Incomplete SQL statement in this line')
+    result = run_impala_shell_interactive(query)
+    assert '| count(*) |' in result.stdout
+
+    query = ('select id from functional.alltypes\n' +
+             'order by id; /*\n' +
+             '* Multi-line comment\n' +
+             '*/')
+    result = run_impala_shell_interactive(query)
+    assert '| id   |' in result.stdout
+
 def run_impala_shell_interactive(input_lines, shell_args=None):
   """Runs a command in the Impala shell interactively."""
   # if argument "input_lines" is a string, makes it into a list


[3/5] impala git commit: IMPALA-6516: Log catalog update only if the catalog version changes

Posted by ta...@apache.org.
IMPALA-6516: Log catalog update only if the catalog version changes

Impalad writes a line of log whenever a statestore catalog update comes
in. This patch removes the logging when the catalog version doesn't
change.

Change-Id: I04b8dd05c588d4cd91e9ca2251f8f66325bb45e2
Reviewed-on: http://gerrit.cloudera.org:8080/9311
Reviewed-by: Michael Ho <kw...@cloudera.com>
Reviewed-by: anujphadke <ap...@cloudera.com>
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/185c78fd
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/185c78fd
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/185c78fd

Branch: refs/heads/2.x
Commit: 185c78fd45fc643ccf1e6c989babda39dcaa40b9
Parents: 85c3c4d
Author: Tianyi Wang <tw...@cloudera.com>
Authored: Tue Feb 13 13:21:35 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Feb 14 08:09:59 2018 +0000

----------------------------------------------------------------------
 be/src/service/impala-server.cc | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/185c78fd/be/src/service/impala-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index ee8405b..cff53e7 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -1352,13 +1352,15 @@ void ImpalaServer::CatalogUpdateCallback(
   } else {
     {
       unique_lock<mutex> unique_lock(catalog_version_lock_);
+      if (catalog_update_info_.catalog_version != resp.new_catalog_version) {
+        LOG(INFO) << "Catalog topic update applied with version: " <<
+            resp.new_catalog_version << " new min catalog object version: " <<
+            resp.min_catalog_object_version;
+      }
       catalog_update_info_.catalog_version = resp.new_catalog_version;
       catalog_update_info_.catalog_topic_version = delta.to_version;
       catalog_update_info_.catalog_service_id = resp.catalog_service_id;
       catalog_update_info_.min_catalog_object_version = resp.min_catalog_object_version;
-      LOG(INFO) << "Catalog topic update applied with version: " <<
-          resp.new_catalog_version << " new min catalog object version: " <<
-          resp.min_catalog_object_version;
       catalog_update_info_.UpdateCatalogVersionMetrics();
     }
     ImpaladMetrics::CATALOG_READY->SetValue(resp.new_catalog_version > 0);


[4/5] impala git commit: Use unqualified table refs in TPCH planner tests.

Posted by ta...@apache.org.
Use unqualified table refs in TPCH planner tests.

There were a few places where we accidentally used fully-qualified
table references. As a result, the testTpchViews() test did not
exactly cover what was intended.

Change-Id: I886c451ab61a1739af96eeb765821dfd8e951b07
Reviewed-on: http://gerrit.cloudera.org:8080/9270
Reviewed-by: Taras Bobrovytsky <tb...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/889e08d3
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/889e08d3
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/889e08d3

Branch: refs/heads/2.x
Commit: 889e08d3afae48007d57683e2ebcf0e9012d3314
Parents: 185c78f
Author: Alex Behm <al...@cloudera.com>
Authored: Fri Feb 9 11:18:12 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Feb 14 08:09:59 2018 +0000

----------------------------------------------------------------------
 .../queries/PlannerTest/tpch-all.test           | 12 ++---
 .../queries/PlannerTest/tpch-views.test         | 50 ++++++++++----------
 2 files changed, 31 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/889e08d3/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
index da2c745..d4a7d55 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test
@@ -108,10 +108,10 @@ where
     select
       min(ps_supplycost)
     from
-      tpch.partsupp,
-      tpch.supplier,
-      tpch.nation,
-      tpch.region
+      partsupp,
+      supplier,
+      nation,
+      region
     where
       p_partkey = ps_partkey
       and s_suppkey = ps_suppkey
@@ -2460,7 +2460,7 @@ from (
     c_custkey,
     count(o_orderkey) as c_count
   from
-    customer left outer join tpch.orders on (
+    customer left outer join orders on (
       c_custkey = o_custkey
       and o_comment not like '%special%requests%'
     )
@@ -3749,7 +3749,7 @@ from
   supplier,
   lineitem l1,
   orders,
-  tpch.nation
+  nation
 where
   s_suppkey = l1.l_suppkey
   and o_orderkey = l1.l_orderkey

http://git-wip-us.apache.org/repos/asf/impala/blob/889e08d3/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
index 5bb8828..52af979 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test
@@ -64,10 +64,10 @@ where
     select
       min(ps_supplycost)
     from
-      tpch.partsupp,
-      tpch.supplier,
-      tpch.nation,
-      tpch.region
+      partsupp,
+      supplier,
+      nation,
+      region
     where
       p_partkey = ps_partkey
       and s_suppkey = ps_suppkey
@@ -128,36 +128,36 @@ PLAN-ROOT SINK
 |     runtime filters: RF012 -> tpch.supplier.s_nationkey, RF014 -> tpch.supplier.s_suppkey
 |
 12:AGGREGATE [FINALIZE]
-|  output: min(ps_supplycost)
-|  group by: ps_partkey
+|  output: min(tpch.partsupp.ps_supplycost)
+|  group by: tpch.partsupp.ps_partkey
 |
 11:HASH JOIN [INNER JOIN]
-|  hash predicates: n_regionkey = r_regionkey
-|  runtime filters: RF004 <- r_regionkey
+|  hash predicates: tpch.nation.n_regionkey = tpch.region.r_regionkey
+|  runtime filters: RF004 <- tpch.region.r_regionkey
 |
 |--08:SCAN HDFS [tpch.region]
 |     partitions=1/1 files=1 size=384B
-|     predicates: r_name = 'EUROPE'
+|     predicates: tpch.region.r_name = 'EUROPE'
 |
 10:HASH JOIN [INNER JOIN]
-|  hash predicates: s_nationkey = n_nationkey
-|  runtime filters: RF006 <- n_nationkey
+|  hash predicates: tpch.supplier.s_nationkey = tpch.nation.n_nationkey
+|  runtime filters: RF006 <- tpch.nation.n_nationkey
 |
 |--07:SCAN HDFS [tpch.nation]
 |     partitions=1/1 files=1 size=2.15KB
-|     runtime filters: RF004 -> n_regionkey
+|     runtime filters: RF004 -> tpch.nation.n_regionkey
 |
 09:HASH JOIN [INNER JOIN]
-|  hash predicates: ps_suppkey = s_suppkey
-|  runtime filters: RF008 <- s_suppkey
+|  hash predicates: tpch.partsupp.ps_suppkey = tpch.supplier.s_suppkey
+|  runtime filters: RF008 <- tpch.supplier.s_suppkey
 |
 |--06:SCAN HDFS [tpch.supplier]
 |     partitions=1/1 files=1 size=1.33MB
-|     runtime filters: RF006 -> s_nationkey
+|     runtime filters: RF006 -> tpch.supplier.s_nationkey
 |
 05:SCAN HDFS [tpch.partsupp]
    partitions=1/1 files=1 size=112.71MB
-   runtime filters: RF000 -> tpch.partsupp.ps_partkey, RF008 -> ps_suppkey
+   runtime filters: RF000 -> tpch.partsupp.ps_partkey, RF008 -> tpch.partsupp.ps_suppkey
 ====
 # TPCH-Q3
 # Q3 - Shipping Priority Query
@@ -878,7 +878,7 @@ from (
     c_custkey,
     count(o_orderkey) as c_count
   from
-    customer left outer join tpch.orders on (
+    customer left outer join orders on (
       c_custkey = o_custkey
       and o_comment not like '%special%requests%'
     )
@@ -901,11 +901,11 @@ PLAN-ROOT SINK
 |  group by: count(o_orderkey)
 |
 03:AGGREGATE [FINALIZE]
-|  output: count(o_orderkey)
+|  output: count(tpch.orders.o_orderkey)
 |  group by: tpch.customer.c_custkey
 |
 02:HASH JOIN [RIGHT OUTER JOIN]
-|  hash predicates: o_custkey = tpch.customer.c_custkey
+|  hash predicates: tpch.orders.o_custkey = tpch.customer.c_custkey
 |  runtime filters: RF000 <- tpch.customer.c_custkey
 |
 |--00:SCAN HDFS [tpch.customer]
@@ -913,8 +913,8 @@ PLAN-ROOT SINK
 |
 01:SCAN HDFS [tpch.orders]
    partitions=1/1 files=1 size=162.56MB
-   predicates: NOT o_comment LIKE '%special%requests%'
-   runtime filters: RF000 -> o_custkey
+   predicates: NOT tpch.orders.o_comment LIKE '%special%requests%'
+   runtime filters: RF000 -> tpch.orders.o_custkey
 ====
 # TPCH-Q14
 # Q14 - Promotion Effect
@@ -1361,7 +1361,7 @@ from
   supplier,
   lineitem l1,
   orders,
-  tpch.nation
+  nation
 where
   s_suppkey = l1.l_suppkey
   and o_orderkey = l1.l_orderkey
@@ -1414,12 +1414,12 @@ PLAN-ROOT SINK
 |  |  runtime filters: RF000 <- tpch.lineitem.l_orderkey
 |  |
 |  |--08:HASH JOIN [INNER JOIN]
-|  |  |  hash predicates: tpch.supplier.s_nationkey = n_nationkey
-|  |  |  runtime filters: RF002 <- n_nationkey
+|  |  |  hash predicates: tpch.supplier.s_nationkey = tpch.nation.n_nationkey
+|  |  |  runtime filters: RF002 <- tpch.nation.n_nationkey
 |  |  |
 |  |  |--03:SCAN HDFS [tpch.nation]
 |  |  |     partitions=1/1 files=1 size=2.15KB
-|  |  |     predicates: n_name = 'SAUDI ARABIA'
+|  |  |     predicates: tpch.nation.n_name = 'SAUDI ARABIA'
 |  |  |
 |  |  07:HASH JOIN [INNER JOIN]
 |  |  |  hash predicates: tpch.lineitem.l_suppkey = tpch.supplier.s_suppkey


[2/5] impala git commit: KUDU-2296: Fix deserialization of messages larger than 64MB

Posted by ta...@apache.org.
KUDU-2296: Fix deserialization of messages larger than 64MB

Protobuf's CodedInputStream has a 64MB total byte limit by
default. When trying to deserialize messages larger than
this, ParseMessage() hits this limit and mistakenly
think that the packet is too short. This issue is dormant
due to Kudu's default rpc_max_message_size of 50MB.
However, Impala will be using a larger value for
rpc_max_message_size and requires this fix.

The fix is to override the default 64MB limit by calling
CodedInputStream::SetTotalByteLimit() with the buffer's
size.

Change-Id: I57d3f3ca6ec0aa8be0e67e6a13c4b560c9d2c63a
Reviewed-on: http://gerrit.cloudera.org:8080/9312
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>
Reviewed-on: http://gerrit.cloudera.org:8080/9313
Reviewed-by: Joe McDonnell <jo...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/85c3c4d1
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/85c3c4d1
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/85c3c4d1

Branch: refs/heads/2.x
Commit: 85c3c4d184d3998e5ffcd905ed3f7a3898fe11ff
Parents: fd067e4
Author: Joe McDonnell <jo...@cloudera.com>
Authored: Tue Feb 13 14:43:19 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Feb 14 04:37:41 2018 +0000

----------------------------------------------------------------------
 be/src/kudu/rpc/serialization.cc | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/85c3c4d1/be/src/kudu/rpc/serialization.cc
----------------------------------------------------------------------
diff --git a/be/src/kudu/rpc/serialization.cc b/be/src/kudu/rpc/serialization.cc
index dbb0fc5..fbc05bc 100644
--- a/be/src/kudu/rpc/serialization.cc
+++ b/be/src/kudu/rpc/serialization.cc
@@ -114,6 +114,10 @@ Status ParseMessage(const Slice& buf,
     << "Got mis-sized buffer: " << KUDU_REDACT(buf.ToDebugString());
 
   CodedInputStream in(buf.data(), buf.size());
+  // Protobuf enforces a 64MB total bytes limit on CodedInputStream by default.
+  // Override this default with the actual size of the buffer to allow messages
+  // larger than 64MB.
+  in.SetTotalBytesLimit(buf.size(), -1);
   in.Skip(kMsgLengthPrefixLength);
 
   uint32_t header_len;