You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by da...@apache.org on 2018/07/17 22:11:03 UTC
kudu git commit: KUDU-2312: Scan predicate application ordering is
non-deterministic
Repository: kudu
Updated Branches:
refs/heads/branch-1.6.x f0d95c27e -> f43c77bed
KUDU-2312: Scan predicate application ordering is non-deterministic
This changes the scan predicate evaluation ordering so that it primarily
orders by selectivity (as before), but breaks ties by column index.
Change-Id: I99b2cabecd8626cad7e11fbdd492af7276e08348
Reviewed-on: http://gerrit.cloudera.org:8080/9440
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>
Reviewed-on: http://gerrit.cloudera.org:8080/10945
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/f43c77be
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/f43c77be
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/f43c77be
Branch: refs/heads/branch-1.6.x
Commit: f43c77bed464556b66bf8f035dc50be541d6cdb4
Parents: f0d95c2
Author: Dan Burkert <da...@apache.org>
Authored: Tue Feb 20 11:17:48 2018 -0800
Committer: Dan Burkert <da...@apache.org>
Committed: Tue Jul 17 22:10:48 2018 +0000
----------------------------------------------------------------------
src/kudu/common/generic_iterators-test.cc | 82 +++++++++++++++++++++++++-
src/kudu/common/generic_iterators.cc | 34 +++++++----
src/kudu/common/generic_iterators.h | 14 +++--
src/kudu/tablet/cfile_set.h | 1 -
4 files changed, 111 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/f43c77be/src/kudu/common/generic_iterators-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/generic_iterators-test.cc b/src/kudu/common/generic_iterators-test.cc
index e7f9250..55eaa09 100644
--- a/src/kudu/common/generic_iterators-test.cc
+++ b/src/kudu/common/generic_iterators-test.cc
@@ -51,6 +51,7 @@ DEFINE_int32(num_lists, 3, "Number of lists to merge");
DEFINE_int32(num_rows, 1000, "Number of entries per list");
DEFINE_int32(num_iters, 1, "Number of times to run merge");
+using std::make_shared;
using std::shared_ptr;
using std::string;
using std::vector;
@@ -309,7 +310,7 @@ TEST(TestPredicateEvaluatingIterator, TestPredicateEvaluation) {
ASSERT_EQ(0, spec.predicates().size())
<< "Iterator tree should have accepted predicate";
- ASSERT_EQ(1, pred_eval->col_idx_predicates_.size())
+ ASSERT_EQ(1, pred_eval->col_predicates_.size())
<< "Predicate should be evaluated by the outer iterator";
Arena arena(1024);
@@ -338,4 +339,83 @@ TEST(TestPredicateEvaluatingIterator, TestDontWrapWhenNoPredicates) {
ASSERT_EQ(outer_iter, materializing) << "InitAndMaybeWrap should not have wrapped iter";
}
+// Test row-wise iterator which does nothing.
+class DummyIterator : public RowwiseIterator {
+ public:
+
+ explicit DummyIterator(Schema schema)
+ : schema_(std::move(schema)) {
+ }
+
+ Status Init(ScanSpec* /*spec*/) override {
+ return Status::OK();
+ }
+
+ virtual Status NextBlock(RowBlock* /*dst*/) override {
+ LOG(FATAL) << "unimplemented!";
+ return Status::OK();
+ }
+
+ virtual bool HasNext() const override {
+ LOG(FATAL) << "unimplemented!";
+ return false;
+ }
+
+ virtual string ToString() const override {
+ return "DummyIterator";
+ }
+
+ virtual const Schema& schema() const override {
+ return schema_;
+ }
+
+ virtual void GetIteratorStats(vector<IteratorStats>* stats) const override {
+ stats->resize(schema().num_columns());
+ }
+
+ private:
+ Schema schema_;
+};
+
+TEST(TestPredicateEvaluatingIterator, TestPredicateEvaluationOrder) {
+ Schema schema({ ColumnSchema("a_int64", INT64),
+ ColumnSchema("b_int64", INT64),
+ ColumnSchema("c_int32", INT32) }, 3);
+
+ int64_t zero = 0;
+ int64_t two = 2;
+ auto a_equality = ColumnPredicate::Equality(schema.column(0), &zero);
+ auto b_equality = ColumnPredicate::Equality(schema.column(1), &zero);
+ auto c_equality = ColumnPredicate::Equality(schema.column(2), &zero);
+ auto a_range = ColumnPredicate::Range(schema.column(0), &zero, &two);
+
+ { // Test that more selective predicates come before others.
+ ScanSpec spec;
+ spec.AddPredicate(a_range);
+ spec.AddPredicate(b_equality);
+ spec.AddPredicate(c_equality);
+
+ shared_ptr<RowwiseIterator> iter = make_shared<DummyIterator>(schema);
+ ASSERT_OK(PredicateEvaluatingIterator::InitAndMaybeWrap(&iter, &spec));
+
+ PredicateEvaluatingIterator* pred_eval = down_cast<PredicateEvaluatingIterator*>(iter.get());
+ ASSERT_TRUE(pred_eval->col_predicates_ ==
+ vector<ColumnPredicate>({ c_equality, b_equality, a_range }));
+ }
+
+ { // Test that smaller columns come before larger ones, and ties are broken by idx.
+ ScanSpec spec;
+ spec.AddPredicate(b_equality);
+ spec.AddPredicate(a_equality);
+ spec.AddPredicate(c_equality);
+
+ shared_ptr<RowwiseIterator> iter = make_shared<DummyIterator>(schema);
+ ASSERT_OK(PredicateEvaluatingIterator::InitAndMaybeWrap(&iter, &spec));
+
+ PredicateEvaluatingIterator* pred_eval = down_cast<PredicateEvaluatingIterator*>(iter.get());
+ ASSERT_TRUE(pred_eval->col_predicates_ ==
+ vector<ColumnPredicate>({ c_equality, a_equality, b_equality }));
+ }
+}
+
} // namespace kudu
http://git-wip-us.apache.org/repos/asf/kudu/blob/f43c77be/src/kudu/common/generic_iterators.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/generic_iterators.cc b/src/kudu/common/generic_iterators.cc
index 811682c..fac5acf 100644
--- a/src/kudu/common/generic_iterators.cc
+++ b/src/kudu/common/generic_iterators.cc
@@ -23,7 +23,6 @@
#include <memory>
#include <mutex>
#include <string>
-#include <tuple>
#include <unordered_map>
#include <utility>
@@ -44,14 +43,13 @@
#include "kudu/util/flag_tags.h"
#include "kudu/util/memory/arena.h"
-using std::all_of;
using std::get;
using std::move;
+using std::pair;
using std::remove_if;
using std::shared_ptr;
using std::sort;
using std::string;
-using std::tuple;
using std::unique_ptr;
using std::vector;
@@ -491,11 +489,13 @@ Status MaterializingIterator::Init(ScanSpec *spec) {
}
}
- // Sort the predicates by selectivity so that the most selective are evaluated earlier.
+ // Sort the predicates by selectivity so that the most selective are evaluated
+ // earlier, with ties broken by the column index.
sort(col_idx_predicates_.begin(), col_idx_predicates_.end(),
- [] (const tuple<int32_t, ColumnPredicate>& left,
- const tuple<int32_t, ColumnPredicate>& right) {
- return SelectivityComparator(get<1>(left), get<1>(right));
+ [] (const pair<int32_t, ColumnPredicate>& left,
+ const pair<int32_t, ColumnPredicate>& right) {
+ int comp = SelectivityComparator(left.second, right.second);
+ return comp ? comp < 0 : left.first < right.first;
});
return Status::OK();
@@ -588,6 +588,7 @@ PredicateEvaluatingIterator::PredicateEvaluatingIterator(shared_ptr<RowwiseItera
Status PredicateEvaluatingIterator::InitAndMaybeWrap(
shared_ptr<RowwiseIterator> *base_iter, ScanSpec *spec) {
RETURN_NOT_OK((*base_iter)->Init(spec));
+
if (spec != nullptr && !spec->predicates().empty()) {
// Underlying iterator did not accept all predicates. Wrap it.
shared_ptr<RowwiseIterator> wrapper(new PredicateEvaluatingIterator(*base_iter));
@@ -603,15 +604,22 @@ Status PredicateEvaluatingIterator::Init(ScanSpec *spec) {
// Gather any predicates that the base iterator did not pushdown, and remove
// the predicates from the spec.
- col_idx_predicates_.clear();
- col_idx_predicates_.reserve(spec->predicates().size());
+ col_predicates_.clear();
+ col_predicates_.reserve(spec->predicates().size());
for (auto& predicate : spec->predicates()) {
- col_idx_predicates_.emplace_back(predicate.second);
+ col_predicates_.emplace_back(predicate.second);
}
spec->RemovePredicates();
- // Sort the predicates by selectivity so that the most selective are evaluated earlier.
- sort(col_idx_predicates_.begin(), col_idx_predicates_.end(), SelectivityComparator);
+ // Sort the predicates by selectivity so that the most selective are evaluated
+ // earlier, with ties broken by the column index.
+ sort(col_predicates_.begin(), col_predicates_.end(),
+ [&] (const ColumnPredicate& left, const ColumnPredicate& right) {
+ int comp = SelectivityComparator(left, right);
+ if (comp != 0) return comp < 0;
+ return schema().find_column(left.column().name())
+ < schema().find_column(right.column().name());
+ });
return Status::OK();
}
@@ -623,7 +631,7 @@ bool PredicateEvaluatingIterator::HasNext() const {
Status PredicateEvaluatingIterator::NextBlock(RowBlock *dst) {
RETURN_NOT_OK(base_iter_->NextBlock(dst));
- for (const auto& predicate : col_idx_predicates_) {
+ for (const auto& predicate : col_predicates_) {
int32_t col_idx = dst->schema().find_column(predicate.column().name());
if (col_idx == Schema::kColumnNotFound) {
return Status::InvalidArgument("Unknown column in predicate", predicate.ToString());
http://git-wip-us.apache.org/repos/asf/kudu/blob/f43c77be/src/kudu/common/generic_iterators.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/generic_iterators.h b/src/kudu/common/generic_iterators.h
index d90562d..49ed9b5 100644
--- a/src/kudu/common/generic_iterators.h
+++ b/src/kudu/common/generic_iterators.h
@@ -23,7 +23,7 @@
#include <memory>
#include <ostream>
#include <string>
-#include <tuple>
+#include <utility>
#include <vector>
#include <glog/logging.h>
@@ -203,8 +203,9 @@ class MaterializingIterator : public RowwiseIterator {
std::shared_ptr<ColumnwiseIterator> iter_;
- // List of (column index, predicate) in order of most to least selective.
- std::vector<std::tuple<int32_t, ColumnPredicate>> col_idx_predicates_;
+ // List of (column index, predicate) in order of most to least selective, with
+ // ties broken by the index.
+ std::vector<std::pair<int32_t, ColumnPredicate>> col_idx_predicates_;
// List of column indexes without predicates to materialize.
std::vector<int32_t> non_predicate_column_indexes_;
@@ -248,17 +249,20 @@ class PredicateEvaluatingIterator : public RowwiseIterator {
}
private:
+
// Construct the evaluating iterator.
// This is only called from ::InitAndMaybeWrap()
// REQUIRES: base_iter is already Init()ed.
explicit PredicateEvaluatingIterator(std::shared_ptr<RowwiseIterator> base_iter);
FRIEND_TEST(TestPredicateEvaluatingIterator, TestPredicateEvaluation);
+ FRIEND_TEST(TestPredicateEvaluatingIterator, TestPredicateEvaluationOrder);
std::shared_ptr<RowwiseIterator> base_iter_;
- // List of (column index, predicate) in order of most to least selective.
- std::vector<ColumnPredicate> col_idx_predicates_;
+ // List of predicates in order of most to least selective, with
+ // ties broken by the column index.
+ std::vector<ColumnPredicate> col_predicates_;
};
} // namespace kudu
http://git-wip-us.apache.org/repos/asf/kudu/blob/f43c77be/src/kudu/tablet/cfile_set.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/cfile_set.h b/src/kudu/tablet/cfile_set.h
index cfd28d2..e107c21 100644
--- a/src/kudu/tablet/cfile_set.h
+++ b/src/kudu/tablet/cfile_set.h
@@ -120,7 +120,6 @@ class CFileSet : public std::enable_shared_from_this<CFileSet> {
private:
friend class Iterator;
- friend class CFileSetIteratorProjector;
DISALLOW_COPY_AND_ASSIGN(CFileSet);