You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by he...@apache.org on 2017/07/22 21:09:44 UTC
[1/4] incubator-impala git commit: IMPALA-3937: Deprecate
--be_service_threads
Repository: incubator-impala
Updated Branches:
refs/heads/master ad0c6e749 -> 304edb28c
IMPALA-3937: Deprecate --be_service_threads
FLAGS_be_service_threads does nothing, and can be removed. Backend
Thrift servers do not use a fix-sized thread pool, instead using one
thread-per-connection.
Change-Id: I10e48014f24eebd22251bac4734bc3c90dee47c0
Reviewed-on: http://gerrit.cloudera.org:8080/7483
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ed732443
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ed732443
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ed732443
Branch: refs/heads/master
Commit: ed7324431d16a37a279d730a036197fc9019c3ce
Parents: ad0c6e7
Author: Henry Robinson <he...@cloudera.com>
Authored: Fri Jul 21 16:20:55 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 22 03:44:01 2017 +0000
----------------------------------------------------------------------
be/src/service/impala-server.cc | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ed732443/be/src/service/impala-server.cc
----------------------------------------------------------------------
diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc
index a26f0ab..c3d22fe 100644
--- a/be/src/service/impala-server.cc
+++ b/be/src/service/impala-server.cc
@@ -114,8 +114,8 @@ DEFINE_int32(hs2_port, 21050, "port on which HiveServer2 client requests are ser
DEFINE_int32(fe_service_threads, 64,
"number of threads available to serve client requests");
-DEFINE_int32(be_service_threads, 64,
- "(Advanced) number of threads available to serve backend execution requests");
+DEFINE_int32_hidden(be_service_threads, 64,
+ "Deprecated, no longer has any effect. Will be removed in Impala 3.0.");
DEFINE_string(default_query_options, "", "key=value pair of default query options for"
" impalad, separated by ','");
DEFINE_int32(query_log_size, 25, "Number of queries to retain in the query log. If -1, "
@@ -1934,7 +1934,7 @@ Status CreateImpalaServer(ExecEnv* exec_env, int beeswax_port, int hs2_port, int
be_processor->setEventHandler(event_handler);
*be_server = new ThriftServer("backend", be_processor, be_port, nullptr,
- exec_env->metrics(), FLAGS_be_service_threads);
+ exec_env->metrics());
if (EnableInternalSslConnections()) {
LOG(INFO) << "Enabling SSL for backend";
RETURN_IF_ERROR((*be_server)->EnableSsl(FLAGS_ssl_server_certificate,
[2/4] incubator-impala git commit: IMPALA-5688: Reduce run-time of a
couple of expr-test heavy-hitters
Posted by he...@apache.org.
IMPALA-5688: Reduce run-time of a couple of expr-test heavy-hitters
Two tests (LongReverse and the base64 tests in StringFunctions)
run their tests over all lengths from 0..{{some length}}. Both take
several minutes to complete. This adds a lot of runtime for not much
more confidence.
Pick a set of 'interesting' (including powers-of-two, prime numbers,
edge-cases) lengths to run them over instead. Test time is reduced by
>150s on my desktop machine in debug mode.
Change-Id: I2962115734aff8dcaae0cc405274765105e31572
Reviewed-on: http://gerrit.cloudera.org:8080/7474
Reviewed-by: Henry Robinson <he...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1653419b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1653419b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1653419b
Branch: refs/heads/master
Commit: 1653419bd8b3748bbc0e3d5e7ffa1d412bc4b50f
Parents: ed73244
Author: Henry Robinson <he...@cloudera.com>
Authored: Thu Jul 20 10:52:34 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 22 04:11:39 2017 +0000
----------------------------------------------------------------------
be/src/exprs/expr-test.cc | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1653419b/be/src/exprs/expr-test.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc
index 3b33c47..1e3366f 100644
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -3331,9 +3331,9 @@ TEST_F(ExprTest, StringFunctions) {
big_str[ColumnType::MAX_VARCHAR_LENGTH] = '\0';
sprintf(query, "cast('%sxxx' as VARCHAR(%d))", big_str, ColumnType::MAX_VARCHAR_LENGTH);
TestStringValue(query, big_str);
+}
- // base64{en,de}code
-
+TEST_F(ExprTest, StringBase64Coding) {
// Test some known values of base64{en,de}code
TestIsNull("base64encode(NULL)", TYPE_STRING);
TestIsNull("base64decode(NULL)", TYPE_STRING);
@@ -3348,12 +3348,12 @@ TEST_F(ExprTest, StringFunctions) {
// Test random short strings.
srand(0);
- for (int length = 1; length < 100; ++length) {
+ // Pick some 'interesting' (i.e. random, but include some powers of two, some primes,
+ // and edge-cases) lengths to test.
+ for (int length: {1, 2, 3, 5, 8, 32, 42, 50, 64, 71, 89, 99}) {
for (int iteration = 0; iteration < 10; ++iteration) {
string raw(length, ' ');
- for (int j = 0; j < length; ++j) {
- raw[j] = rand() % 128;
- }
+ for (int j = 0; j < length; ++j) raw[j] = rand() % 128;
const string as_octal = StringToOctalLiteral(raw);
TestValue("length(base64encode('" + as_octal + "')) > length('" + as_octal + "')",
TYPE_BOOLEAN, true);
@@ -3366,7 +3366,9 @@ TEST_F(ExprTest, StringFunctions) {
TEST_F(ExprTest, LongReverse) {
static const int MAX_LEN = 2048;
string to_reverse(MAX_LEN, ' '), reversed(MAX_LEN, ' ');
- for (int i = 0; i < MAX_LEN; ++i) {
+ // Pick some 'interesting' (i.e. random, but include some powers of two, some primes,
+ // and edge-cases) lengths to test.
+ for (int i: {1, 2, 3, 5, 8, 32, 42, 512, 1024, 1357, 1788, 2012, 2047}) {
to_reverse[i] = reversed[MAX_LEN - 1 - i] = 'a' + (rand() % 26);
TestStringValue("reverse('" + to_reverse.substr(0, i + 1) + "')",
reversed.substr(MAX_LEN - 1 - i));
[3/4] incubator-impala git commit: IMPALA-5679: Fix Parquet count(*)
with group by string
Posted by he...@apache.org.
IMPALA-5679: Fix Parquet count(*) with group by string
In a recent patch (IMPALA-5036) a bug was introduced where a count(*)
query with a group by a string partition column returned incorrect
results. Data was being written into the tuple at an incorrect offset.
Testing:
- Added an end to end test where we are selecting from a table
partitioned by string.
Change-Id: I225547574c2b2259ca81cb642d082e151f3bed6b
Reviewed-on: http://gerrit.cloudera.org:8080/7481
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/408b0aac
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/408b0aac
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/408b0aac
Branch: refs/heads/master
Commit: 408b0aac831ab7d6d6459353848f9a11b811e281
Parents: 1653419
Author: Taras Bobrovytsky <tb...@cloudera.com>
Authored: Fri Jul 21 14:13:28 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 22 05:53:06 2017 +0000
----------------------------------------------------------------------
be/src/exec/hdfs-scan-node-base.h | 2 +-
.../queries/QueryTest/parquet-stats-agg.test | 22 ++++++++++++++++++++
tests/query_test/test_aggregation.py | 6 +++---
3 files changed, 26 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/408b0aac/be/src/exec/hdfs-scan-node-base.h
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node-base.h b/be/src/exec/hdfs-scan-node-base.h
index f71f5b4..c79a6e8 100644
--- a/be/src/exec/hdfs-scan-node-base.h
+++ b/be/src/exec/hdfs-scan-node-base.h
@@ -155,7 +155,7 @@ class HdfsScanNodeBase : public ScanNode {
int skip_header_line_count() const { return skip_header_line_count_; }
DiskIoRequestContext* reader_context() { return reader_context_; }
bool optimize_parquet_count_star() const { return optimize_parquet_count_star_; }
- bool parquet_count_star_slot_offset() const { return parquet_count_star_slot_offset_; }
+ int parquet_count_star_slot_offset() const { return parquet_count_star_slot_offset_; }
typedef std::unordered_map<TupleId, std::vector<ScalarExprEvaluator*>>
ConjunctEvaluatorsMap;
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/408b0aac/testdata/workloads/functional-query/queries/QueryTest/parquet-stats-agg.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/parquet-stats-agg.test b/testdata/workloads/functional-query/queries/QueryTest/parquet-stats-agg.test
index 3b1c33b..620c50b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/parquet-stats-agg.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/parquet-stats-agg.test
@@ -115,3 +115,25 @@ select count(*) from tpch_parquet.lineitem
---- TYPES
bigint
=====
+---- QUERY
+# IMPALA-5679: Count(*) with group by on a string partition column.
+drop table if exists $DATABASE.string_partitioned_table;
+create table $DATABASE.string_partitioned_table (int_col integer)
+partitioned by (string_col STRING) stored as parquet;
+insert into $DATABASE.string_partitioned_table partition(string_col)
+select int_col, string_col from functional.alltypes;
+select string_col, count(*) from $DATABASE.string_partitioned_table group by string_col;
+---- RESULTS
+'0',730
+'1',730
+'2',730
+'3',730
+'4',730
+'5',730
+'6',730
+'7',730
+'8',730
+'9',730
+---- TYPES
+string, bigint
+=====
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/408b0aac/tests/query_test/test_aggregation.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_aggregation.py b/tests/query_test/test_aggregation.py
index 289a867..4999afe 100644
--- a/tests/query_test/test_aggregation.py
+++ b/tests/query_test/test_aggregation.py
@@ -271,14 +271,14 @@ class TestAggregationQueries(ImpalaTestSuite):
# Verify codegen was enabled for all four stages of the aggregation.
assert_codegen_enabled(result.runtime_profile, [1, 2, 4, 6])
- def test_parquet_count_star_optimization(self, vector):
+ def test_parquet_count_star_optimization(self, vector, unique_database):
if (vector.get_value('table_format').file_format != 'text' or
vector.get_value('table_format').compression_codec != 'none'):
# No need to run this test on all file formats
pytest.skip()
- self.run_test_case('QueryTest/parquet-stats-agg', vector)
+ self.run_test_case('QueryTest/parquet-stats-agg', vector, unique_database)
vector.get_value('exec_option')['batch_size'] = 1
- self.run_test_case('QueryTest/parquet-stats-agg', vector)
+ self.run_test_case('QueryTest/parquet-stats-agg', vector, unique_database)
class TestWideAggregationQueries(ImpalaTestSuite):
"""Test that aggregations with many grouping columns work"""
[4/4] incubator-impala git commit: Remove spurious forward
declarations and includes in coordinator.cc/h
Posted by he...@apache.org.
Remove spurious forward declarations and includes in coordinator.cc/h
While working on another patch, I noticed that a lot of includes and
forward declarations were spurious and possibly the result of bit rot.
This patch removes them and hopefully improves compile time a little.
Testing: Made sure that Impala and the BE tests compile successfully.
Change-Id: Id0afed224fad6a00698701487b51506d414f83ac
Reviewed-on: http://gerrit.cloudera.org:8080/7482
Reviewed-by: Sailesh Mukil <sa...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/304edb28
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/304edb28
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/304edb28
Branch: refs/heads/master
Commit: 304edb28c7acff4b188de651d21f122c0b26f465
Parents: 408b0aa
Author: Sailesh Mukil <sa...@cloudera.com>
Authored: Fri Jul 21 15:31:27 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Sat Jul 22 08:10:27 2017 +0000
----------------------------------------------------------------------
be/src/runtime/coordinator.cc | 13 ++-----------
be/src/runtime/coordinator.h | 11 -----------
2 files changed, 2 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/304edb28/be/src/runtime/coordinator.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.cc b/be/src/runtime/coordinator.cc
index f904c48..e2d9c4a 100644
--- a/be/src/runtime/coordinator.cc
+++ b/be/src/runtime/coordinator.cc
@@ -40,42 +40,33 @@
#include "common/logging.h"
#include "exec/data-sink.h"
#include "exec/plan-root-sink.h"
-#include "exec/scan-node.h"
#include "gen-cpp/Frontend_types.h"
#include "gen-cpp/ImpalaInternalService.h"
#include "gen-cpp/ImpalaInternalService_constants.h"
#include "gen-cpp/ImpalaInternalService_types.h"
#include "gen-cpp/Partitions_types.h"
#include "gen-cpp/PlanNodes_types.h"
-#include "runtime/backend-client.h"
-#include "runtime/client-cache.h"
-#include "runtime/data-stream-mgr.h"
-#include "runtime/data-stream-sender.h"
#include "runtime/exec-env.h"
#include "runtime/fragment-instance-state.h"
#include "runtime/hdfs-fs-cache.h"
#include "runtime/mem-tracker.h"
-#include "runtime/parallel-executor.h"
#include "runtime/query-exec-mgr.h"
-#include "runtime/row-batch.h"
-#include "runtime/tuple-row.h"
#include "runtime/coordinator-filter-state.h"
#include "runtime/coordinator-backend-state.h"
#include "runtime/debug-options.h"
#include "runtime/query-state.h"
#include "scheduling/scheduler.h"
#include "util/bloom-filter.h"
-#include "util/container-util.h"
#include "util/counting-barrier.h"
#include "util/debug-util.h"
#include "util/error-util.h"
#include "util/hdfs-bulk-ops.h"
#include "util/hdfs-util.h"
+#include "util/histogram-metric.h"
#include "util/network-util.h"
#include "util/pretty-printer.h"
-#include "util/summary-util.h"
+#include "util/runtime-profile.h"
#include "util/table-printer.h"
-#include "util/uid-util.h"
#include "common/names.h"
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/304edb28/be/src/runtime/coordinator.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/coordinator.h b/be/src/runtime/coordinator.h
index e3106b0..0401fa0 100644
--- a/be/src/runtime/coordinator.h
+++ b/be/src/runtime/coordinator.h
@@ -40,29 +40,18 @@
#include "gen-cpp/Types_types.h"
#include "runtime/runtime-state.h" // for PartitionStatusMap; TODO: disentangle
#include "scheduling/query-schedule.h"
-#include "util/histogram-metric.h"
#include "util/progress-updater.h"
-#include "util/runtime-profile.h"
namespace impala {
class CountingBarrier;
-class DataStreamMgr;
-class DataSink;
-class RowBatch;
-class RowDescriptor;
class ObjectPool;
class RuntimeState;
-class ExecEnv;
class TUpdateCatalogRequest;
-class TQueryExecRequest;
class TReportExecStatusParams;
-class TRowBatch;
class TPlanExecRequest;
class TRuntimeProfileTree;
class RuntimeProfile;
-class TablePrinter;
-class TPlanFragment;
class QueryResultSet;
class MemTracker;
class PlanRootSink;