You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by he...@apache.org on 2017/07/22 21:09:44 UTC

[1/4] incubator-impala git commit: IMPALA-3937: Deprecate --be_service_threads

Repository: incubator-impala
Updated Branches:
  refs/heads/master ad0c6e749 -> 304edb28c


IMPALA-3937: Deprecate --be_service_threads

FLAGS_be_service_threads does nothing, and can be removed. Backend
Thrift servers do not use a fix-sized thread pool, instead using one
thread-per-connection.

Change-Id: I10e48014f24eebd22251bac4734bc3c90dee47c0
Reviewed-on: http://gerrit.cloudera.org:8080/7483
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ed732443
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ed732443
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ed732443

Branch: refs/heads/master
Commit: ed7324431d16a37a279d730a036197fc9019c3ce
Parents: ad0c6e7
Author: Henry Robinson <he...@cloudera.com>
Authored: Fri Jul 21 16:20:55 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 22 03:44:01 2017 +0000

----------------------------------------------------------------------
 be/src/service/impala-server.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ed732443/be/src/service/impala-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index a26f0ab..c3d22fe 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -114,8 +114,8 @@ DEFINE_int32(hs2_port, 21050, "port on which HiveServer2 client requests are ser
 
 DEFINE_int32(fe_service_threads, 64,
     "number of threads available to serve client requests");
-DEFINE_int32(be_service_threads, 64,
-    "(Advanced) number of threads available to serve backend execution requests");
+DEFINE_int32_hidden(be_service_threads, 64,
+    "Deprecated, no longer has any effect. Will be removed in Impala 3.0.");
 DEFINE_string(default_query_options, "", "key=value pair of default query options for"
     " impalad, separated by ','");
 DEFINE_int32(query_log_size, 25, "Number of queries to retain in the query log. If -1, "
@@ -1934,7 +1934,7 @@ Status CreateImpalaServer(ExecEnv* exec_env, int beeswax_port, int hs2_port, int
     be_processor->setEventHandler(event_handler);
 
     *be_server = new ThriftServer("backend", be_processor, be_port, nullptr,
-        exec_env->metrics(), FLAGS_be_service_threads);
+        exec_env->metrics());
     if (EnableInternalSslConnections()) {
       LOG(INFO) << "Enabling SSL for backend";
       RETURN_IF_ERROR((*be_server)->EnableSsl(FLAGS_ssl_server_certificate,


[2/4] incubator-impala git commit: IMPALA-5688: Reduce run-time of a couple of expr-test heavy-hitters

Posted by he...@apache.org.
IMPALA-5688: Reduce run-time of a couple of expr-test heavy-hitters

Two tests (LongReverse and the base64 tests in StringFunctions)
run their tests over all lengths from 0..{{some length}}. Both take
several minutes to complete. This adds a lot of runtime for not much
more confidence.

Pick a set of 'interesting' (including powers-of-two, prime numbers,
edge-cases) lengths to run them over instead. Test time is reduced by
>150s on my desktop machine in debug mode.

Change-Id: I2962115734aff8dcaae0cc405274765105e31572
Reviewed-on: http://gerrit.cloudera.org:8080/7474
Reviewed-by: Henry Robinson <he...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1653419b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1653419b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1653419b

Branch: refs/heads/master
Commit: 1653419bd8b3748bbc0e3d5e7ffa1d412bc4b50f
Parents: ed73244
Author: Henry Robinson <he...@cloudera.com>
Authored: Thu Jul 20 10:52:34 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 22 04:11:39 2017 +0000

----------------------------------------------------------------------
 be/src/exprs/expr-test.cc | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1653419b/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 3b33c47..1e3366f 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -3331,9 +3331,9 @@ TEST_F(ExprTest, StringFunctions) {
   big_str[ColumnType::MAX_VARCHAR_LENGTH] = '\0';
   sprintf(query, "cast('%sxxx' as VARCHAR(%d))", big_str, ColumnType::MAX_VARCHAR_LENGTH);
   TestStringValue(query, big_str);
+}
 
-  // base64{en,de}code
-
+TEST_F(ExprTest, StringBase64Coding) {
   // Test some known values of base64{en,de}code
   TestIsNull("base64encode(NULL)", TYPE_STRING);
   TestIsNull("base64decode(NULL)", TYPE_STRING);
@@ -3348,12 +3348,12 @@ TEST_F(ExprTest, StringFunctions) {
 
   // Test random short strings.
   srand(0);
-  for (int length = 1; length < 100; ++length) {
+  // Pick some 'interesting' (i.e. random, but include some powers of two, some primes,
+  // and edge-cases) lengths to test.
+  for (int length: {1, 2, 3, 5, 8, 32, 42, 50, 64, 71, 89, 99}) {
     for (int iteration = 0; iteration < 10; ++iteration) {
       string raw(length, ' ');
-      for (int j = 0; j < length; ++j) {
-        raw[j] = rand() % 128;
-      }
+      for (int j = 0; j < length; ++j) raw[j] = rand() % 128;
       const string as_octal = StringToOctalLiteral(raw);
       TestValue("length(base64encode('" + as_octal + "')) > length('" + as_octal + "')",
           TYPE_BOOLEAN, true);
@@ -3366,7 +3366,9 @@ TEST_F(ExprTest, StringFunctions) {
 TEST_F(ExprTest, LongReverse) {
   static const int MAX_LEN = 2048;
   string to_reverse(MAX_LEN, ' '), reversed(MAX_LEN, ' ');
-  for (int i = 0; i < MAX_LEN; ++i) {
+  // Pick some 'interesting' (i.e. random, but include some powers of two, some primes,
+  // and edge-cases) lengths to test.
+  for (int i: {1, 2, 3, 5, 8, 32, 42, 512, 1024, 1357, 1788, 2012, 2047}) {
     to_reverse[i] = reversed[MAX_LEN - 1 - i] = 'a' + (rand() % 26);
     TestStringValue("reverse('" + to_reverse.substr(0, i + 1) + "')",
         reversed.substr(MAX_LEN - 1 - i));


[3/4] incubator-impala git commit: IMPALA-5679: Fix Parquet count(*) with group by string

Posted by he...@apache.org.
IMPALA-5679: Fix Parquet count(*) with group by string

In a recent patch (IMPALA-5036) a bug was introduced where a count(*)
query with a group by a string partition column returned incorrect
results. Data was being written into the tuple at an incorrect offset.

Testing:
- Added an end to end test where we are selecting from a table
  partitioned by string.

Change-Id: I225547574c2b2259ca81cb642d082e151f3bed6b
Reviewed-on: http://gerrit.cloudera.org:8080/7481
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/408b0aac
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/408b0aac
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/408b0aac

Branch: refs/heads/master
Commit: 408b0aac831ab7d6d6459353848f9a11b811e281
Parents: 1653419
Author: Taras Bobrovytsky <tb...@cloudera.com>
Authored: Fri Jul 21 14:13:28 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 22 05:53:06 2017 +0000

----------------------------------------------------------------------
 be/src/exec/hdfs-scan-node-base.h               |  2 +-
 .../queries/QueryTest/parquet-stats-agg.test    | 22 ++++++++++++++++++++
 tests/query_test/test_aggregation.py            |  6 +++---
 3 files changed, 26 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/408b0aac/be/src/exec/hdfs-scan-node-base.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node-base.h b/be/src/exec/hdfs-scan-node-base.h
index f71f5b4..c79a6e8 100644
--- a/be/src/exec/hdfs-scan-node-base.h
+++ b/be/src/exec/hdfs-scan-node-base.h
@@ -155,7 +155,7 @@ class HdfsScanNodeBase : public ScanNode {
   int skip_header_line_count() const { return skip_header_line_count_; }
   DiskIoRequestContext* reader_context() { return reader_context_; }
   bool optimize_parquet_count_star() const { return optimize_parquet_count_star_; }
-  bool parquet_count_star_slot_offset() const { return parquet_count_star_slot_offset_; }
+  int parquet_count_star_slot_offset() const { return parquet_count_star_slot_offset_; }
 
   typedef std::unordered_map<TupleId, std::vector<ScalarExprEvaluator*>>
     ConjunctEvaluatorsMap;

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/408b0aac/testdata/workloads/functional-query/queries/QueryTest/parquet-stats-agg.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/parquet-stats-agg.test b/testdata/workloads/functional-query/queries/QueryTest/parquet-stats-agg.test
index 3b1c33b..620c50b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/parquet-stats-agg.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/parquet-stats-agg.test
@@ -115,3 +115,25 @@ select count(*) from tpch_parquet.lineitem
 ---- TYPES
 bigint
 =====
+---- QUERY
+# IMPALA-5679: Count(*) with group by on a string partition column.
+drop table if exists $DATABASE.string_partitioned_table;
+create table $DATABASE.string_partitioned_table (int_col integer)
+partitioned by (string_col STRING) stored as parquet;
+insert into $DATABASE.string_partitioned_table partition(string_col)
+select int_col, string_col from functional.alltypes;
+select string_col, count(*) from $DATABASE.string_partitioned_table group by string_col;
+---- RESULTS
+'0',730
+'1',730
+'2',730
+'3',730
+'4',730
+'5',730
+'6',730
+'7',730
+'8',730
+'9',730
+---- TYPES
+string, bigint
+=====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/408b0aac/tests/query_test/test_aggregation.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_aggregation.py b/tests/query_test/test_aggregation.py
index 289a867..4999afe 100644
--- a/tests/query_test/test_aggregation.py
+++ b/tests/query_test/test_aggregation.py
@@ -271,14 +271,14 @@ class TestAggregationQueries(ImpalaTestSuite):
       # Verify codegen was enabled for all four stages of the aggregation.
       assert_codegen_enabled(result.runtime_profile, [1, 2, 4, 6])
 
-  def test_parquet_count_star_optimization(self, vector):
+  def test_parquet_count_star_optimization(self, vector, unique_database):
     if (vector.get_value('table_format').file_format != 'text' or
         vector.get_value('table_format').compression_codec != 'none'):
       # No need to run this test on all file formats
       pytest.skip()
-    self.run_test_case('QueryTest/parquet-stats-agg', vector)
+    self.run_test_case('QueryTest/parquet-stats-agg', vector, unique_database)
     vector.get_value('exec_option')['batch_size'] = 1
-    self.run_test_case('QueryTest/parquet-stats-agg', vector)
+    self.run_test_case('QueryTest/parquet-stats-agg', vector, unique_database)
 
 class TestWideAggregationQueries(ImpalaTestSuite):
   """Test that aggregations with many grouping columns work"""


[4/4] incubator-impala git commit: Remove spurious forward declarations and includes in coordinator.cc/h

Posted by he...@apache.org.
Remove spurious forward declarations and includes in coordinator.cc/h

While working on another patch, I noticed that a lot of includes and
forward declarations were spurious and possibly the result of bit rot.
This patch removes them and hopefully improves compile time a little.

Testing: Made sure that Impala and the BE tests compile successfully.

Change-Id: Id0afed224fad6a00698701487b51506d414f83ac
Reviewed-on: http://gerrit.cloudera.org:8080/7482
Reviewed-by: Sailesh Mukil <sa...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/304edb28
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/304edb28
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/304edb28

Branch: refs/heads/master
Commit: 304edb28c7acff4b188de651d21f122c0b26f465
Parents: 408b0aa
Author: Sailesh Mukil <sa...@cloudera.com>
Authored: Fri Jul 21 15:31:27 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 22 08:10:27 2017 +0000

----------------------------------------------------------------------
 be/src/runtime/coordinator.cc | 13 ++-----------
 be/src/runtime/coordinator.h  | 11 -----------
 2 files changed, 2 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/304edb28/be/src/runtime/coordinator.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.cc b/be/src/runtime/coordinator.cc
index f904c48..e2d9c4a 100644
--- a/be/src/runtime/coordinator.cc
+++ b/be/src/runtime/coordinator.cc
@@ -40,42 +40,33 @@
 #include "common/logging.h"
 #include "exec/data-sink.h"
 #include "exec/plan-root-sink.h"
-#include "exec/scan-node.h"
 #include "gen-cpp/Frontend_types.h"
 #include "gen-cpp/ImpalaInternalService.h"
 #include "gen-cpp/ImpalaInternalService_constants.h"
 #include "gen-cpp/ImpalaInternalService_types.h"
 #include "gen-cpp/Partitions_types.h"
 #include "gen-cpp/PlanNodes_types.h"
-#include "runtime/backend-client.h"
-#include "runtime/client-cache.h"
-#include "runtime/data-stream-mgr.h"
-#include "runtime/data-stream-sender.h"
 #include "runtime/exec-env.h"
 #include "runtime/fragment-instance-state.h"
 #include "runtime/hdfs-fs-cache.h"
 #include "runtime/mem-tracker.h"
-#include "runtime/parallel-executor.h"
 #include "runtime/query-exec-mgr.h"
-#include "runtime/row-batch.h"
-#include "runtime/tuple-row.h"
 #include "runtime/coordinator-filter-state.h"
 #include "runtime/coordinator-backend-state.h"
 #include "runtime/debug-options.h"
 #include "runtime/query-state.h"
 #include "scheduling/scheduler.h"
 #include "util/bloom-filter.h"
-#include "util/container-util.h"
 #include "util/counting-barrier.h"
 #include "util/debug-util.h"
 #include "util/error-util.h"
 #include "util/hdfs-bulk-ops.h"
 #include "util/hdfs-util.h"
+#include "util/histogram-metric.h"
 #include "util/network-util.h"
 #include "util/pretty-printer.h"
-#include "util/summary-util.h"
+#include "util/runtime-profile.h"
 #include "util/table-printer.h"
-#include "util/uid-util.h"
 
 #include "common/names.h"
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/304edb28/be/src/runtime/coordinator.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.h b/be/src/runtime/coordinator.h
index e3106b0..0401fa0 100644
--- a/be/src/runtime/coordinator.h
+++ b/be/src/runtime/coordinator.h
@@ -40,29 +40,18 @@
 #include "gen-cpp/Types_types.h"
 #include "runtime/runtime-state.h" // for PartitionStatusMap; TODO: disentangle
 #include "scheduling/query-schedule.h"
-#include "util/histogram-metric.h"
 #include "util/progress-updater.h"
-#include "util/runtime-profile.h"
 
 namespace impala {
 
 class CountingBarrier;
-class DataStreamMgr;
-class DataSink;
-class RowBatch;
-class RowDescriptor;
 class ObjectPool;
 class RuntimeState;
-class ExecEnv;
 class TUpdateCatalogRequest;
-class TQueryExecRequest;
 class TReportExecStatusParams;
-class TRowBatch;
 class TPlanExecRequest;
 class TRuntimeProfileTree;
 class RuntimeProfile;
-class TablePrinter;
-class TPlanFragment;
 class QueryResultSet;
 class MemTracker;
 class PlanRootSink;