You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by da...@apache.org on 2017/01/06 22:25:54 UTC
kudu git commit: Tightening ScanSpec primary bounds when range
predicate exists
Repository: kudu
Updated Branches:
refs/heads/master e126bde8d -> 443b724e8
Tightening ScanSpec primary bounds when range predicate exists
Currently, push upper bound key predicates will break when meeting
a range predicate.
I think the upper bound key can be decreased.
For example, If PK = (a, b) and a < 2 && b < 3, PK can be further
tightning to PK < (1, 3).
Change-Id: I931a617605434700352d285fc2039033cf9eb07e
Reviewed-on: http://gerrit.cloudera.org:8080/5360
Tested-by: Kudu Jenkins
Reviewed-by: Dan Burkert <da...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/443b724e
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/443b724e
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/443b724e
Branch: refs/heads/master
Commit: 443b724e8858bc88747cbf33ec34afd5dbd3be2a
Parents: e126bde
Author: honghaijei <ho...@gmail.com>
Authored: Mon Dec 5 12:50:21 2016 +0000
Committer: Dan Burkert <da...@apache.org>
Committed: Fri Jan 6 22:25:39 2017 +0000
----------------------------------------------------------------------
src/kudu/common/key_util-test.cc | 57 +++++++++++++++
src/kudu/common/key_util.cc | 122 ++++++++++++++++++++++++++++-----
src/kudu/common/key_util.h | 3 +
src/kudu/common/scan_spec-test.cc | 61 +++++++++++++++--
4 files changed, 221 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/443b724e/src/kudu/common/key_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/key_util-test.cc b/src/kudu/common/key_util-test.cc
index 70dc181..b6c71f3 100644
--- a/src/kudu/common/key_util-test.cc
+++ b/src/kudu/common/key_util-test.cc
@@ -130,4 +130,61 @@ TEST_F(KeyUtilTest, TestIncrementCompositeStringIntPrimaryKey) {
EXPECT_EQ(R"(string k1="hello\000", int32 k2=-2147483648)", p_row.ToString());
}
+TEST_F(KeyUtilTest, TestTryDecrementCell) {
+ {
+ ColumnSchema col_uint32("a", UINT32);
+ uint32_t orig = 12;
+ EXPECT_EQ(key_util::TryDecrementCell(col_uint32, &orig), true);
+ EXPECT_EQ(orig, 11);
+ }
+ {
+ ColumnSchema col_uint32("a", UINT32);
+ uint32_t orig = 0;
+ EXPECT_EQ(key_util::TryDecrementCell(col_uint32, &orig), false);
+ EXPECT_EQ(orig, 0);
+ }
+ {
+ ColumnSchema col_int32("a", INT32);
+ int32_t orig = 0;
+ EXPECT_EQ(key_util::TryDecrementCell(col_int32, &orig), true);
+ EXPECT_EQ(orig, -1);
+ }
+ {
+ ColumnSchema col_int32("a", INT32);
+ int32_t orig = numeric_limits<int32_t>::min();
+ EXPECT_EQ(key_util::TryDecrementCell(col_int32, &orig), false);
+ EXPECT_EQ(orig, numeric_limits<int32_t>::min());
+ }
+ {
+ ColumnSchema col_bool("a", BOOL);
+ bool orig = true;
+ EXPECT_EQ(key_util::TryDecrementCell(col_bool, &orig), true);
+ EXPECT_EQ(orig, false);
+ }
+ {
+ ColumnSchema col_bool("a", BOOL);
+ int32_t orig = false;
+ EXPECT_EQ(key_util::TryDecrementCell(col_bool, &orig), false);
+ EXPECT_EQ(orig, false);
+ }
+ {
+ ColumnSchema col_str("a", STRING);
+ Slice orig("abc\0", 4);
+ EXPECT_EQ(key_util::TryDecrementCell(col_str, &orig), true);
+ EXPECT_EQ(orig, Slice("abc"));
+ }
+ {
+ ColumnSchema col_str("a", STRING);
+ Slice orig("abc");
+ EXPECT_EQ(key_util::TryDecrementCell(col_str, &orig), false);
+ EXPECT_EQ(orig, Slice("abc"));
+ }
+ {
+ ColumnSchema col_str("a", STRING);
+ Slice orig("");
+ EXPECT_EQ(key_util::TryDecrementCell(col_str, &orig), false);
+ EXPECT_EQ(orig, Slice(""));
+ }
+}
+
} // namespace kudu
http://git-wip-us.apache.org/repos/asf/kudu/blob/443b724e/src/kudu/common/key_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/key_util.cc b/src/kudu/common/key_util.cc
index 8ee6eb3..451cadd 100644
--- a/src/kudu/common/key_util.cc
+++ b/src/kudu/common/key_util.cc
@@ -55,6 +55,17 @@ bool IncrementBoolCell(void* cell_ptr) {
}
}
+bool DecrementBoolCell(void* cell_ptr) {
+ bool orig;
+ memcpy(&orig, cell_ptr, sizeof(bool));
+ if (orig) {
+ bool dec = false;
+ memcpy(cell_ptr, &dec, sizeof(bool));
+ return true;
+ }
+ return false;
+}
+
template<DataType type>
bool IncrementIntCell(void* cell_ptr) {
typedef DataTypeTraits<type> traits;
@@ -80,6 +91,33 @@ bool IncrementIntCell(void* cell_ptr) {
}
template<DataType type>
+bool DecrementIntCell(void* cell_ptr) {
+ typedef DataTypeTraits<type> traits;
+ typedef typename traits::cpp_type cpp_type;
+
+ cpp_type orig;
+ memcpy(&orig, cell_ptr, sizeof(cpp_type));
+
+ cpp_type dec;
+ if (std::is_unsigned<cpp_type>::value) {
+ dec = orig - 1;
+ } else {
+ // Signed overflow is undefined in C. So, we'll use a branch here
+ // instead of counting on undefined behavior.
+ if (orig == MathLimits<cpp_type>::kMin) {
+ dec = MathLimits<cpp_type>::kMax;
+ } else {
+ dec = orig - 1;
+ }
+ }
+ if (dec < orig) {
+ memcpy(cell_ptr, &dec, sizeof(cpp_type));
+ return true;
+ }
+ return false;
+}
+
+template<DataType type>
bool IncrementFloatingPointCell(void* cell_ptr) {
typedef DataTypeTraits<type> traits;
typedef typename traits::cpp_type cpp_type;
@@ -91,6 +129,20 @@ bool IncrementFloatingPointCell(void* cell_ptr) {
return inc != orig;
}
+template<DataType type>
+bool DecrementFloatingPointCell(void* cell_ptr) {
+ typedef DataTypeTraits<type> traits;
+ typedef typename traits::cpp_type cpp_type;
+
+ cpp_type orig;
+ memcpy(&orig, cell_ptr, sizeof(cpp_type));
+ cpp_type dec = nextafter(orig, -numeric_limits<cpp_type>::infinity());
+ if (dec != orig) {
+ memcpy(cell_ptr, &dec, sizeof(cpp_type));
+ }
+ return false;
+}
+
bool IncrementStringCell(void* cell_ptr, Arena* arena) {
Slice orig;
memcpy(&orig, cell_ptr, sizeof(orig));
@@ -104,6 +156,17 @@ bool IncrementStringCell(void* cell_ptr, Arena* arena) {
return true;
}
+bool DecrementStringCell(void* cell_ptr) {
+ Slice orig;
+ memcpy(&orig, cell_ptr, sizeof(orig));
+ if (orig.size() == 0 || orig[orig.size() - 1] != '\0') {
+ return false;
+ }
+ orig.truncate(orig.size() - 1);
+ memcpy(cell_ptr, &orig, sizeof(orig));
+ return true;
+}
+
template<typename ColIdxIter>
void SetKeyToMinValues(ColIdxIter first, ColIdxIter last, ContiguousRow* row) {
for (auto col_idx_it = first; col_idx_it != last; std::advance(col_idx_it, 1)) {
@@ -140,13 +203,11 @@ int PushUpperBoundKeyPredicates(ColIdxIter first,
const Schema& schema = *CHECK_NOTNULL(row->schema());
int pushed_predicates = 0;
- // Tracks whether the last pushed predicate is an equality or InList predicate.
- const ColumnPredicate* final_predicate = nullptr;
-
- // Step 1: copy predicates into the row in key column order, stopping after
- // the first range or missing predicate.
+ // Step 1: copy predicates into the row in key column order, stopping after the first missing
+ // predicate or range predicate which can't be transformed to an inclusive upper bound.
bool break_loop = false;
+ bool is_inclusive_bound = true;
for (auto col_idx_it = first; !break_loop && col_idx_it < last; std::advance(col_idx_it, 1)) {
const ColumnSchema& column = schema.column(*col_idx_it);
const ColumnPredicate* predicate = FindOrNull(predicates, column.name());
@@ -156,19 +217,20 @@ int PushUpperBoundKeyPredicates(ColIdxIter first,
case PredicateType::Equality:
memcpy(row->mutable_cell_ptr(*col_idx_it), predicate->raw_lower(), size);
pushed_predicates++;
- final_predicate = predicate;
break;
case PredicateType::Range:
if (predicate->raw_upper() != nullptr) {
memcpy(row->mutable_cell_ptr(*col_idx_it), predicate->raw_upper(), size);
pushed_predicates++;
- final_predicate = predicate;
+ // Try to decrease because upper bound is exclusive.
+ if (!TryDecrementCell(row->schema()->column(*col_idx_it),
+ row->mutable_cell_ptr(*col_idx_it))) {
+ is_inclusive_bound = false;
+ break_loop = true;
+ }
+ } else {
+ break_loop = true;
}
- // After the first column with a range constraint we stop pushing
- // constraints into the upper bound. Instead, we push minimum values
- // to the remaining columns (below), which is the maximally tight
- // constraint.
- break_loop = true;
break;
case PredicateType::IsNotNull:
break_loop = true;
@@ -179,7 +241,6 @@ int PushUpperBoundKeyPredicates(ColIdxIter first,
DCHECK(!predicate->raw_values().empty());
memcpy(row->mutable_cell_ptr(*col_idx_it), predicate->raw_values().back(), size);
pushed_predicates++;
- final_predicate = predicate;
break;
case PredicateType::None:
LOG(FATAL) << "NONE predicate can not be pushed into key";
@@ -189,10 +250,8 @@ int PushUpperBoundKeyPredicates(ColIdxIter first,
// If no predicates were pushed, no need to do any more work.
if (pushed_predicates == 0) { return 0; }
- // Step 2: If the final predicate is an equality predicate or an InList predicate,
- // increment the key to convert it to an exclusive upper bound.
- if (final_predicate->predicate_type() == PredicateType::Equality
- || final_predicate->predicate_type() == PredicateType::InList) {
+ // Step 2: If the upper bound is inclusive, increment it to become exclusive.
+ if (is_inclusive_bound) {
if (!IncrementKey(first, std::next(first, pushed_predicates), row, arena)) {
// If the increment fails then this bound is is not constraining the keyspace.
return 0;
@@ -295,6 +354,35 @@ bool IncrementCell(const ColumnSchema& col, void* cell_ptr, Arena* arena) {
#undef HANDLE_TYPE
}
+bool TryDecrementCell(const ColumnSchema &col, void *cell_ptr) {
+ DataType type = col.type_info()->physical_type();
+ switch (type) {
+ case BOOL:
+ return DecrementBoolCell(cell_ptr);
+#define HANDLE_TYPE(t) case t: return DecrementIntCell<t>(cell_ptr);
+ HANDLE_TYPE(UINT8);
+ HANDLE_TYPE(UINT16);
+ HANDLE_TYPE(UINT32);
+ HANDLE_TYPE(UINT64);
+ HANDLE_TYPE(INT8);
+ HANDLE_TYPE(INT16);
+ HANDLE_TYPE(INT32);
+ HANDLE_TYPE(UNIXTIME_MICROS);
+ HANDLE_TYPE(INT64);
+ case FLOAT:
+ return DecrementFloatingPointCell<FLOAT>(cell_ptr);
+ case DOUBLE:
+ return DecrementFloatingPointCell<DOUBLE>(cell_ptr);
+ case STRING:
+ case BINARY:
+ return DecrementStringCell(cell_ptr);
+ case UNKNOWN_DATA:
+ default: LOG(FATAL) << "Unknown data type: " << type;
+ }
+ return false; // unreachable
+#undef HANDLE_TYPE
+}
+
int PushLowerBoundKeyPredicates(const vector<int32_t>& col_idxs,
const unordered_map<string, ColumnPredicate>& predicates,
ContiguousRow* row,
http://git-wip-us.apache.org/repos/asf/kudu/blob/443b724e/src/kudu/common/key_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/key_util.h b/src/kudu/common/key_util.h
index 8dc60f4..70da7c6 100644
--- a/src/kudu/common/key_util.h
+++ b/src/kudu/common/key_util.h
@@ -60,6 +60,9 @@ bool IncrementPrimaryKey(ContiguousRow* row, Arena* arena) WARN_UNUSED_RESULT;
// Increments the provided cell in place.
bool IncrementCell(const ColumnSchema& col, void* cell_ptr, Arena* arena);
+// Decrements the provided cell. If fail, the result will not be updated.
+bool TryDecrementCell(const ColumnSchema &col, void *cell_ptr);
+
// Pushes lower bound key predicates into the row. Returns the number of pushed
// predicates. Unpushed predicate columns will be set to the minimum value
// (unless no predicates are pushed at all).
http://git-wip-us.apache.org/repos/asf/kudu/blob/443b724e/src/kudu/common/scan_spec-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/scan_spec-test.cc b/src/kudu/common/scan_spec-test.cc
index bf40be7..7798854 100644
--- a/src/kudu/common/scan_spec-test.cc
+++ b/src/kudu/common/scan_spec-test.cc
@@ -46,7 +46,8 @@ class TestScanSpec : public KuduTest {
enum ComparisonOp {
GE,
EQ,
- LE
+ LE,
+ LT
};
template<class T>
@@ -68,7 +69,10 @@ class TestScanSpec : public KuduTest {
auto p = ColumnPredicate::InclusiveRange(schema_.column(idx), nullptr, val_void, &arena_);
if (p) spec->AddPredicate(*p);
break;
- };
+ }
+ case LT:
+ spec->AddPredicate(ColumnPredicate::Range(schema_.column(idx), nullptr, val_void));
+ break;
}
}
@@ -193,7 +197,7 @@ TEST_F(CompositeIntKeysTest, TestConsecutiveUpperRangePredicates) {
AddPredicate<int8_t>(&spec, "c", LE, 5);
SCOPED_TRACE(spec.ToString(schema_));
spec.OptimizeScan(schema_, &arena_, &pool_, true);
- EXPECT_EQ("PK < (int8 a=4, int8 b=-128, int8 c=-128) AND `b` < 5 AND `c` < 6",
+ EXPECT_EQ("PK < (int8 a=3, int8 b=4, int8 c=6) AND `b` < 5 AND `c` < 6",
spec.ToString(schema_));
}
@@ -221,7 +225,7 @@ TEST_F(CompositeIntKeysTest, TestEqualityAndConsecutiveRangePredicates) {
SCOPED_TRACE(spec.ToString(schema_));
spec.OptimizeScan(schema_, &arena_, &pool_, true);
EXPECT_EQ("PK >= (int8 a=3, int8 b=4, int8 c=5) AND "
- "PK < (int8 a=3, int8 b=15, int8 c=-128) AND "
+ "PK < (int8 a=3, int8 b=14, int8 c=16) AND "
"`c` >= 5 AND `c` < 16", spec.ToString(schema_));
}
@@ -362,7 +366,7 @@ TEST_F(CompositeIntKeysTest, TestInListPushdownWithRange) {
SCOPED_TRACE(spec.ToString(schema_));
spec.OptimizeScan(schema_, &arena_, &pool_, true);
EXPECT_EQ("PK >= (int8 a=10, int8 b=50, int8 c=-128) AND "
- "PK < (int8 a=101, int8 b=-128, int8 c=-128) AND "
+ "PK < (int8 a=100, int8 b=101, int8 c=-128) AND "
"`b` IN (50, 100)",
spec.ToString(schema_));
@@ -694,6 +698,53 @@ TEST_F(CompositeIntStringKeysTest, TestPrefixEqualityWithString) {
spec.ToString(schema_));
}
+TEST_F(CompositeIntStringKeysTest, TestDecreaseUpperBoundKey) {
+ {
+ ScanSpec spec;
+ AddPredicate<int8_t>(&spec, "a", LT, 64);
+ AddPredicate<Slice>(&spec, "b", LT, Slice("abc"));
+ AddPredicate<Slice>(&spec, "c", LT, Slice("def\0", 4));
+ SCOPED_TRACE(spec.ToString(schema_));
+ spec.OptimizeScan(schema_, &arena_, &pool_, true);
+ EXPECT_EQ(R"(PK < (int8 a=63, string b="abc", string c="") AND )"
+ R"(`b` < "abc" AND `c` < "def\000")",
+ spec.ToString(schema_));
+ }
+ {
+ ScanSpec spec;
+ AddPredicate<int8_t>(&spec, "a", LT, 64);
+ AddPredicate<Slice>(&spec, "b", LT, Slice("abc\0", 4));
+ AddPredicate<Slice>(&spec, "c", LT, Slice("def"));
+ SCOPED_TRACE(spec.ToString(schema_));
+ spec.OptimizeScan(schema_, &arena_, &pool_, true);
+ EXPECT_EQ(R"(PK < (int8 a=63, string b="abc", string c="def") AND )"
+ R"(`b` < "abc\000" AND `c` < "def")",
+ spec.ToString(schema_));
+ }
+ {
+ ScanSpec spec;
+ AddPredicate<int8_t>(&spec, "a", LT, 64);
+ AddPredicate<Slice>(&spec, "b", LT, Slice("abc\0", 4));
+ AddPredicate<Slice>(&spec, "c", LT, Slice("def\0", 4));
+ SCOPED_TRACE(spec.ToString(schema_));
+ spec.OptimizeScan(schema_, &arena_, &pool_, true);
+ EXPECT_EQ(R"(PK < (int8 a=63, string b="abc", string c="def\000") AND )"
+ R"(`b` < "abc\000" AND `c` < "def\000")",
+ spec.ToString(schema_));
+ }
+ {
+ ScanSpec spec;
+ AddPredicate<int8_t>(&spec, "a", LT, 64);
+ AddPredicate<Slice>(&spec, "b", LT, Slice("abc"));
+ AddPredicate<Slice>(&spec, "c", LT, Slice("def"));
+ SCOPED_TRACE(spec.ToString(schema_));
+ spec.OptimizeScan(schema_, &arena_, &pool_, true);
+ EXPECT_EQ(R"(PK < (int8 a=63, string b="abc", string c="") AND )"
+ R"(`b` < "abc" AND `c` < "def")",
+ spec.ToString(schema_));
+ }
+}
+
// Tests for non-composite int key
//------------------------------------------------------------
class SingleIntKeyTest : public TestScanSpec {