You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/05/13 06:09:35 UTC
[04/10] incubator-impala git commit: Remove replica_preference query
option
Remove replica_preference query option
Change-Id: I5a3134b874a53241706d850d186acbfed768f5ee
Reviewed-on: http://gerrit.cloudera.org:8080/2323
Reviewed-by: Marcel Kornacker <ma...@cloudera.com>
Reviewed-by: Silvius Rus <sr...@cloudera.com>
Tested-by: Internal Jenkins
Reviewed-on: http://gerrit.cloudera.org:8080/3030
Reviewed-by: Lars Volker <lv...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/cb377741
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/cb377741
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/cb377741
Branch: refs/heads/master
Commit: cb377741eca8b5e5aa218e39518136676e535561
Parents: 9174dee
Author: Lars Volker <lv...@cloudera.com>
Authored: Thu Feb 25 22:07:32 2016 -0800
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Thu May 12 23:06:36 2016 -0700
----------------------------------------------------------------------
be/src/scheduling/simple-scheduler-test.cc | 6 +----
be/src/scheduling/simple-scheduler.cc | 7 ++---
be/src/service/query-options.cc | 28 ++------------------
be/src/service/query-options.h | 3 +--
common/thrift/ImpalaInternalService.thrift | 7 +----
common/thrift/ImpalaService.thrift | 7 +----
.../com/cloudera/impala/analysis/TableRef.java | 11 +-------
.../impala/analysis/AnalyzeStmtsTest.java | 26 +++++-------------
.../cloudera/impala/analysis/ParserTest.java | 21 ++++-----------
.../com/cloudera/impala/analysis/ToSqlTest.java | 17 +++++-------
10 files changed, 30 insertions(+), 103 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cb377741/be/src/scheduling/simple-scheduler-test.cc
----------------------------------------------------------------------
diff --git a/be/src/scheduling/simple-scheduler-test.cc b/be/src/scheduling/simple-scheduler-test.cc
index d7786bd..bebb1fb 100644
--- a/be/src/scheduling/simple-scheduler-test.cc
+++ b/be/src/scheduling/simple-scheduler-test.cc
@@ -365,11 +365,7 @@ class Plan {
const TQueryOptions& query_options() const { return query_options_; }
- void SetReplicaPreference(TReplicaPreference::type p) {
- query_options_.replica_preference = p;
- }
-
- void SetRandomReplica(bool b) { query_options_.random_replica = b; }
+ void SetRandomReplica(bool b) { query_options_.schedule_random_replica = b; }
void SetDisableCachedReads(bool b) { query_options_.disable_cached_reads = b; }
const Cluster& cluster() const { return schema_.cluster(); }
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cb377741/be/src/scheduling/simple-scheduler.cc
----------------------------------------------------------------------
diff --git a/be/src/scheduling/simple-scheduler.cc b/be/src/scheduling/simple-scheduler.cc
index 265ca10..5074071 100644
--- a/be/src/scheduling/simple-scheduler.cc
+++ b/be/src/scheduling/simple-scheduler.cc
@@ -451,8 +451,8 @@ Status SimpleScheduler::ComputeScanRangeAssignment(
const TQueryOptions& query_options, FragmentScanRangeAssignment* assignment) {
// We adjust all replicas with memory distance less than base_distance to base_distance
// and view all replicas with equal or better distance as the same. For a full list of
- // memory distance classes see TReplicaPreference in ImpalaInternalService.thrift.
- TReplicaPreference::type base_distance = query_options.replica_preference;
+ // memory distance classes see TReplicaPreference in PlanNodes.thrift.
+ TReplicaPreference::type base_distance = TReplicaPreference::CACHE_LOCAL;
// The query option to disable cached reads adjusts the memory base distance to view
// all replicas as disk_local or worse.
// TODO remove in CDH6
@@ -467,7 +467,8 @@ Status SimpleScheduler::ComputeScanRangeAssignment(
// On otherwise equivalent disk replicas we either pick the first one, or we pick a
// random one. Picking random ones helps with preventing hot spots across several
// queries. On cached replica we will always break ties randomly.
- bool random_non_cached_tiebreak = node_random_replica || query_options.random_replica;
+ bool random_non_cached_tiebreak = node_random_replica
+ || query_options.schedule_random_replica;
// map from datanode host to total assigned bytes.
unordered_map<TNetworkAddress, uint64_t> assigned_bytes_per_host;
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cb377741/be/src/service/query-options.cc
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc
index 4617256..274776c 100644
--- a/be/src/service/query-options.cc
+++ b/be/src/service/query-options.cc
@@ -239,14 +239,6 @@ Status impala::SetQueryOption(const string& key, const string& value,
break;
case TImpalaQueryOptions::DISABLE_CACHED_READS:
if (iequals(value, "true") || iequals(value, "1")) {
- if (query_options->replica_preference == TReplicaPreference::CACHE_LOCAL ||
- query_options->replica_preference == TReplicaPreference::CACHE_RACK) {
- stringstream ss;
- ss << "Conflicting settings: DISABLE_CACHED_READS = true and"
- << " REPLICA_PREFERENCE = " << _TReplicaPreference_VALUES_TO_NAMES.at(
- query_options->replica_preference);
- return Status(ss.str());
- }
query_options->__set_disable_cached_reads(true);
}
break;
@@ -286,24 +278,8 @@ Status impala::SetQueryOption(const string& key, const string& value,
query_options->__set_optimize_partition_key_scans(
iequals(value, "true") || iequals(value, "1"));
break;
- case TImpalaQueryOptions::REPLICA_PREFERENCE:
- if (iequals(value, "cache_local") || iequals(value, "0")) {
- if (query_options->disable_cached_reads) {
- return Status("Conflicting settings: DISABLE_CACHED_READS = true and"
- " REPLICA_PREFERENCE = CACHE_LOCAL");
- }
- query_options->__set_replica_preference(TReplicaPreference::CACHE_LOCAL);
- } else if (iequals(value, "disk_local") || iequals(value, "2")) {
- query_options->__set_replica_preference(TReplicaPreference::DISK_LOCAL);
- } else if (iequals(value, "remote") || iequals(value, "4")) {
- query_options->__set_replica_preference(TReplicaPreference::REMOTE);
- } else {
- return Status(Substitute("Invalid replica memory distance preference '$0'."
- "Valid values are CACHE_LOCAL(0), DISK_LOCAL(2), REMOTE(4)", value));
- }
- break;
- case TImpalaQueryOptions::RANDOM_REPLICA:
- query_options->__set_random_replica(
+ case TImpalaQueryOptions::SCHEDULE_RANDOM_REPLICA:
+ query_options->__set_schedule_random_replica(
iequals(value, "true") || iequals(value, "1"));
break;
case TImpalaQueryOptions::SCAN_NODE_CODEGEN_THRESHOLD:
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cb377741/be/src/service/query-options.h
----------------------------------------------------------------------
diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h
index 6f4c214..fb24530 100644
--- a/be/src/service/query-options.h
+++ b/be/src/service/query-options.h
@@ -65,8 +65,7 @@ class TQueryOptions;
QUERY_OPT_FN(seq_compression_mode, SEQ_COMPRESSION_MODE)\
QUERY_OPT_FN(exec_single_node_rows_threshold, EXEC_SINGLE_NODE_ROWS_THRESHOLD)\
QUERY_OPT_FN(optimize_partition_key_scans, OPTIMIZE_PARTITION_KEY_SCANS)\
- QUERY_OPT_FN(replica_preference, REPLICA_PREFERENCE)\
- QUERY_OPT_FN(random_replica, RANDOM_REPLICA)\
+ QUERY_OPT_FN(schedule_random_replica, SCHEDULE_RANDOM_REPLICA)\
QUERY_OPT_FN(scan_node_codegen_threshold, SCAN_NODE_CODEGEN_THRESHOLD)\
QUERY_OPT_FN(disable_streaming_preaggregations, DISABLE_STREAMING_PREAGGREGATIONS)\
QUERY_OPT_FN(runtime_filter_mode, RUNTIME_FILTER_MODE)\
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cb377741/common/thrift/ImpalaInternalService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index b9892e6..74bef28 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -133,17 +133,12 @@ struct TQueryOptions {
// produce different results than the scan based approach in some edge cases.
32: optional bool optimize_partition_key_scans = 0
- // Specify the prefered locality level of replicas during scan scheduling.
- // Replicas with an equal or better locality will be preferred.
- 33: optional PlanNodes.TReplicaPreference replica_preference =
- PlanNodes.TReplicaPreference.CACHE_LOCAL
-
// Configure whether scheduling of scans over multiple non-cached replicas will break
// ties between multiple, otherwise equivalent locations at random or deterministically.
// The former will pick a random replica, the latter will use the replica order from the
// metastore. This setting will not affect tie-breaking for cached replicas. Instead,
// they will always break ties randomly.
- 34: optional bool random_replica = 0
+ 34: optional bool schedule_random_replica = 0
// For scan nodes with any conjuncts, use codegen to evaluate the conjuncts if
// the number of rows * number of operators in the conjuncts exceeds this threshold.
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cb377741/common/thrift/ImpalaService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index 68647df..9421ec4 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -172,13 +172,8 @@ enum TImpalaQueryOptions {
// produce different results than the scan based approach in some edge cases.
OPTIMIZE_PARTITION_KEY_SCANS,
- // Prefered memory distance of replicas. This parameter determines the pool of replicas
- // among which scans will be scheduled in terms of the distance of the replica storage
- // from the impalad.
- REPLICA_PREFERENCE,
-
// Determines tie breaking policy when picking locations.
- RANDOM_REPLICA,
+ SCHEDULE_RANDOM_REPLICA,
// For scan nodes with any conjuncts, use codegen to evaluate the conjuncts if
// the number of rows * number of operators in the conjuncts exceeds this threshold.
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cb377741/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java b/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
index e53f1b1..6f9ac85 100644
--- a/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
+++ b/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java
@@ -394,16 +394,7 @@ public class TableRef implements ParseNode {
analyzer.addWarning("Table hints only supported for Hdfs tables");
}
for (String hint: tableHints_) {
- if (hint.equalsIgnoreCase("SCHEDULE_CACHE_LOCAL")) {
- analyzer.setHasPlanHints();
- replicaPreference_ = TReplicaPreference.CACHE_LOCAL;
- } else if (hint.equalsIgnoreCase("SCHEDULE_DISK_LOCAL")) {
- analyzer.setHasPlanHints();
- replicaPreference_ = TReplicaPreference.DISK_LOCAL;
- } else if (hint.equalsIgnoreCase("SCHEDULE_REMOTE")) {
- analyzer.setHasPlanHints();
- replicaPreference_ = TReplicaPreference.REMOTE;
- } else if (hint.equalsIgnoreCase("RANDOM_REPLICA")) {
+ if (hint.equalsIgnoreCase("SCHEDULE_RANDOM_REPLICA")) {
analyzer.setHasPlanHints();
randomReplica_ = true;
} else {
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cb377741/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
index 08d86a6..72c58d5 100644
--- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java
@@ -1611,20 +1611,8 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
String suffix = hintStyle[1];
for (String alias : new String[] { "", "a" }) {
AnalyzesOk(
- String.format("select * from functional.alltypes %s %sschedule_cache_local%s",
- alias, prefix, suffix));
- AnalyzesOk(
- String.format("select * from functional.alltypes %s %sschedule_disk_local%s",
- alias, prefix, suffix));
- AnalyzesOk(
- String.format("select * from functional.alltypes %s %sschedule_remote%s",
- alias, prefix, suffix));
- AnalyzesOk(
- String.format("select * from functional.alltypes %s %sschedule_remote," +
- "random_replica%s", alias, prefix, suffix));
- AnalyzesOk(
- String.format("select * from functional.alltypes %s %srandom_replica," +
- "schedule_remote%s", alias, prefix, suffix));
+ String.format("select * from functional.alltypes %s " +
+ "%sschedule_random_replica%s", alias, prefix, suffix));
String name = alias.isEmpty() ? "functional.alltypes" : alias;
AnalyzesOk(String.format("select * from functional.alltypes %s %sFOO%s", alias,
@@ -1633,24 +1621,24 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
// Table hints not supported for HBase tables
AnalyzesOk(String.format("select * from functional_hbase.alltypes %s " +
- "%srandom_replica%s", alias, prefix, suffix),
+ "%sschedule_random_replica%s", alias, prefix, suffix),
"Table hints only supported for Hdfs tables");
// Table hints not supported for catalog views
AnalyzesOk(String.format("select * from functional.alltypes_view %s " +
- "%srandom_replica%s", alias, prefix, suffix),
+ "%sschedule_random_replica%s", alias, prefix, suffix),
"Table hints not supported for inline view and collections");
// Table hints not supported for with clauses
AnalyzesOk(String.format("with t as (select 1) select * from t %s " +
- "%srandom_replica%s", alias, prefix, suffix),
+ "%sschedule_random_replica%s", alias, prefix, suffix),
"Table hints not supported for inline view and collections");
}
// Table hints not supported for inline views
AnalyzesOk(String.format("select * from (select tinyint_col * 2 as c1 from " +
- "functional.alltypes) as v1 %srandom_replica%s", prefix, suffix),
+ "functional.alltypes) as v1 %sschedule_random_replica%s", prefix, suffix),
"Table hints not supported for inline view and collections");
// Table hints not supported for collection tables
AnalyzesOk(String.format("select item from functional.allcomplextypes, " +
- "allcomplextypes.int_array_col %srandom_replica%s", prefix, suffix),
+ "allcomplextypes.int_array_col %sschedule_random_replica%s", prefix, suffix),
"Table hints not supported for inline view and collections");
}
}
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cb377741/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java b/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java
index b8bf54f..f503d69 100644
--- a/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java
+++ b/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java
@@ -392,25 +392,14 @@ public class ParserTest {
// Test TableRef hints.
TestTableHints(String.format(
- "select * from functional.alltypes %sschedule_disk_local%s", prefix, suffix),
- "schedule_disk_local");
- TestTableHints(String.format(
- "select * from functional.alltypes %sschedule_cache_local,random_replica%s",
- prefix, suffix), "schedule_cache_local", "random_replica");
- TestTableHints(String.format(
- "select * from functional.alltypes a %sschedule_cache_local,random_replica%s",
- prefix, suffix), "schedule_cache_local", "random_replica");
- TestTableHints(String.format(
- "select * from functional.alltypes a %sschedule_cache_local,random_replica%s" +
- ", functional.alltypes b %sschedule_remote%s", prefix, suffix,
- prefix, suffix), "schedule_cache_local", "random_replica", "schedule_remote");
+ "select * from functional.alltypes %sschedule_random_replica%s", prefix,
+ suffix), "schedule_random_replica");
// Test both TableRef and join hints.
TestTableAndJoinHints(String.format(
- "select * from functional.alltypes a %sschedule_cache_local,random_replica%s" +
- "join %sbroadcast%s functional.alltypes b %sschedule_remote%s using(id)",
- prefix, suffix, prefix, suffix, prefix, suffix), "schedule_cache_local",
- "random_replica", "broadcast", "schedule_remote");
+ "select * from functional.alltypes a %sschedule_random_replica%s join " +
+ "%sbroadcast%s functional.alltypes b using(id)", prefix, suffix, prefix, suffix,
+ prefix, suffix), "schedule_random_replica", "broadcast");
// Test select-list hints (e.g., straight_join). The legacy-style hint has no
// prefix and suffix.
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/cb377741/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java b/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java
index 521b19f..6005d35 100644
--- a/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java
+++ b/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java
@@ -468,20 +468,17 @@ public class ToSqlTest extends AnalyzerTest {
// Table hint
testToSql(String.format(
- "select * from functional.alltypes at %srandom_replica%s", prefix, suffix),
- "SELECT * FROM functional.alltypes at \n-- +random_replica\n");
+ "select * from functional.alltypes at %sschedule_random_replica%s", prefix,
+ suffix), "SELECT * FROM functional.alltypes at \n-- +schedule_random_replica\n"
+ );
testToSql(String.format(
- "select * from functional.alltypes %srandom_replica%s", prefix, suffix),
- "SELECT * FROM functional.alltypes \n-- +random_replica\n");
- testToSql(String.format(
- "select * from functional.alltypes %srandom_replica,schedule_disk_local%s",
- prefix, suffix), "SELECT * FROM functional.alltypes \n-- +random_replica," +
- "schedule_disk_local\n");
+ "select * from functional.alltypes %sschedule_random_replica%s", prefix,
+ suffix), "SELECT * FROM functional.alltypes \n-- +schedule_random_replica\n");
testToSql(String.format(
"select c1 from (select at.tinyint_col as c1 from functional.alltypes at " +
- "%srandom_replica%s) s1", prefix, suffix),
+ "%sschedule_random_replica%s) s1", prefix, suffix),
"SELECT c1 FROM (SELECT at.tinyint_col c1 FROM functional.alltypes at \n-- +" +
- "random_replica\n) s1");
+ "schedule_random_replica\n) s1");
// Select-list hint. The legacy-style hint has no prefix and suffix.
if (prefix.contains("[")) {