You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2019/06/07 23:49:34 UTC
[kudu] branch master updated: schema: cache first is_deleted
virtual column index
This is an automated email from the ASF dual-hosted git repository.
adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new bf2b3e8 schema: cache first is_deleted virtual column index
bf2b3e8 is described below
commit bf2b3e8d81b3a137a279cbf808badf1eaa8d9d9c
Author: Adar Dembo <ad...@cloudera.com>
AuthorDate: Thu Jun 6 22:35:04 2019 -0700
schema: cache first is_deleted virtual column index
This mirrors what we do in Java, and will make it easier to implement
KuduScanBatch::RowPtr::IsDeleted. On the other hand, it complicates Schema
somewhat, so is it really worth it?
Change-Id: I018daa0c432f909942d5107df1acab3477788683
Reviewed-on: http://gerrit.cloudera.org:8080/13554
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mp...@apache.org>
---
src/kudu/common/generic_iterators-test.cc | 8 +++---
src/kudu/common/generic_iterators.cc | 15 ++++------
src/kudu/common/schema-test.cc | 24 ++++++++--------
src/kudu/common/schema.cc | 47 +++++++++++++++++++++++++------
src/kudu/common/schema.h | 24 +++++-----------
src/kudu/master/master-test.cc | 2 ++
src/kudu/tablet/delta_store.cc | 3 +-
src/kudu/tablet/delta_store.h | 4 ---
src/kudu/tablet/memrowset.cc | 2 +-
src/kudu/tablet/tablet-test-util.h | 4 +--
10 files changed, 72 insertions(+), 61 deletions(-)
diff --git a/src/kudu/common/generic_iterators-test.cc b/src/kudu/common/generic_iterators-test.cc
index d170ffa..190dff4 100644
--- a/src/kudu/common/generic_iterators-test.cc
+++ b/src/kudu/common/generic_iterators-test.cc
@@ -399,8 +399,6 @@ void TestMerge(const Schema& schema,
VLOG(1) << "Predicate expects " << expected.size() << " results: " << expected;
- const int kIsDeletedIndex = schema.find_first_is_deleted_virtual_column();
-
for (int trial = 0; trial < FLAGS_num_iters; trial++) {
vector<IterWithBounds> to_merge;
for (const auto& list : all_ints) {
@@ -454,8 +452,10 @@ void TestMerge(const Schema& schema,
EXPECT_EQ(expected_key, row_key) << "Yielded out of order at idx " << total_idx;
bool is_deleted = false;
if (include_deleted_rows) {
- CHECK_NE(Schema::kColumnNotFound, kIsDeletedIndex);
- is_deleted = *schema.ExtractColumnFromRow<IS_DELETED>(dst.row(i), kIsDeletedIndex);
+ CHECK_NE(Schema::kColumnNotFound,
+ schema.first_is_deleted_virtual_column_idx());
+ is_deleted = *schema.ExtractColumnFromRow<IS_DELETED>(
+ dst.row(i), schema.first_is_deleted_virtual_column_idx());
EXPECT_EQ(expected_is_deleted, is_deleted)
<< "Row " << row_key << " has unexpected IS_DELETED value at index " << total_idx;
}
diff --git a/src/kudu/common/generic_iterators.cc b/src/kudu/common/generic_iterators.cc
index 8752ab5..3c1c987 100644
--- a/src/kudu/common/generic_iterators.cc
+++ b/src/kudu/common/generic_iterators.cc
@@ -507,10 +507,6 @@ class MergeIterator : public RowwiseIterator {
bool initted_;
- // Index of the IS_DELETED column, or Schema::kColumnNotFound if no such
- // column exists in the schema.
- int is_deleted_col_index_;
-
// Holds the sub-iterators until Init is called, at which point this is
// cleared. This is required because we can't create a MergeIterState of an
// uninitialized sub-iterator.
@@ -629,8 +625,8 @@ Status MergeIterator::Init(ScanSpec *spec) {
}
#endif
- is_deleted_col_index_ = schema_->find_first_is_deleted_virtual_column();
- if (opts_.include_deleted_rows && is_deleted_col_index_ == Schema::kColumnNotFound) {
+ if (opts_.include_deleted_rows &&
+ schema_->first_is_deleted_virtual_column_idx() == Schema::kColumnNotFound) {
return Status::InvalidArgument("Merge iterator cannot deduplicate deleted "
"rows without an IS_DELETED column");
}
@@ -865,7 +861,8 @@ Status MergeIterator::MaterializeOneRow(RowBlock* dst, size_t* dst_row_idx) {
int live_rows_found = 0;
for (const auto& s : smallest) {
bool is_deleted =
- *schema_->ExtractColumnFromRow<IS_DELETED>(s->next_row(), is_deleted_col_index_);
+ *schema_->ExtractColumnFromRow<IS_DELETED>(
+ s->next_row(), schema_->first_is_deleted_virtual_column_idx());
if (!is_deleted) {
// We found the single live instance of the row.
row_to_return_iter = s;
@@ -882,8 +879,8 @@ Status MergeIterator::MaterializeOneRow(RowBlock* dst, size_t* dst_row_idx) {
// deleted instance.
if (row_to_return_iter == nullptr) {
row_to_return_iter = smallest[0];
- DCHECK(*schema_->ExtractColumnFromRow<IS_DELETED>(row_to_return_iter->next_row(),
- is_deleted_col_index_))
+ DCHECK(*schema_->ExtractColumnFromRow<IS_DELETED>(
+ row_to_return_iter->next_row(), schema_->first_is_deleted_virtual_column_idx()))
<< "expected deleted row";
}
}
diff --git a/src/kudu/common/schema-test.cc b/src/kudu/common/schema-test.cc
index 0d26bbb..16e4dcd 100644
--- a/src/kudu/common/schema-test.cc
+++ b/src/kudu/common/schema-test.cc
@@ -475,21 +475,21 @@ TEST_F(TestSchema, TestGetMappedReadProjection) {
// Ensure that virtual columns that are nullable or that do not have read
// defaults are rejected.
- Schema nullable_projection({ ColumnSchema("key", STRING),
- ColumnSchema("deleted", IS_DELETED,
- /*is_nullable=*/true,
- /*read_default=*/&kReadDefault) },
- 1);
- Status s = tablet_schema.GetMappedReadProjection(nullable_projection, &mapped);
+ Schema nullable_projection;
+ Status s = nullable_projection.Reset({ ColumnSchema("key", STRING),
+ ColumnSchema("deleted", IS_DELETED,
+ /*is_nullable=*/true,
+ /*read_default=*/&kReadDefault) },
+ 1);
ASSERT_FALSE(s.ok());
ASSERT_STR_CONTAINS(s.ToString(), "must not be nullable");
- Schema no_default_projection({ ColumnSchema("key", STRING),
- ColumnSchema("deleted", IS_DELETED,
- /*is_nullable=*/false,
- /*read_default=*/nullptr) },
- 1);
- s = tablet_schema.GetMappedReadProjection(no_default_projection, &mapped);
+ Schema no_default_projection;
+ s = no_default_projection.Reset({ ColumnSchema("key", STRING),
+ ColumnSchema("deleted", IS_DELETED,
+ /*is_nullable=*/false,
+ /*read_default=*/nullptr) },
+ 1);
ASSERT_FALSE(s.ok());
ASSERT_STR_CONTAINS(s.ToString(), "must have a default value for read");
}
diff --git a/src/kudu/common/schema.cc b/src/kudu/common/schema.cc
index 8a5509e..65659c3 100644
--- a/src/kudu/common/schema.cc
+++ b/src/kudu/common/schema.cc
@@ -50,6 +50,36 @@ static const ColumnId kFirstColumnId(0);
static const ColumnId kFirstColumnId(10);
#endif
+namespace {
+
+Status FindFirstIsDeletedVirtualColumnIdx(
+ const vector<ColumnSchema>& cols, int* idx) {
+ for (int i = 0; i < cols.size(); i++) {
+ const auto& col = cols[i];
+ if (col.type_info()->type() == IS_DELETED) {
+ // Enforce some properties on the virtual column that simplify our
+ // implementation.
+ // TODO(KUDU-2692): Consider removing these requirements.
+ if (col.is_nullable()) {
+ return Status::InvalidArgument(Substitute(
+ "Virtual column $0 $1 must not be nullable",
+ col.name(), col.TypeToString()));
+ }
+ if (!col.has_read_default()) {
+ return Status::InvalidArgument(Substitute(
+ "Virtual column $0 $1 must have a default value for read",
+ col.name(), col.TypeToString()));
+ }
+ *idx = i;
+ return Status::OK();
+ }
+ }
+ *idx = Schema::kColumnNotFound;
+ return Status::OK();
+}
+
+} // anonymous namespace
+
bool ColumnTypeAttributes::EqualsForType(ColumnTypeAttributes other,
DataType type) const {
switch (type) {
@@ -189,6 +219,7 @@ void Schema::CopyFrom(const Schema& other) {
name_to_index_[col.name()] = i++;
}
+ first_is_deleted_virtual_column_idx_ = other.first_is_deleted_virtual_column_idx_;
has_nullables_ = other.has_nullables_;
}
@@ -204,6 +235,7 @@ Schema::Schema(Schema&& other) noexcept
NameToIndexMap::key_equal(),
NameToIndexMapAllocator(&name_to_index_bytes_)),
id_to_index_(std::move(other.id_to_index_)),
+ first_is_deleted_virtual_column_idx_(other.first_is_deleted_virtual_column_idx_),
has_nullables_(other.has_nullables_) {
// 'name_to_index_' uses a customer allocator which holds a pointer to
// 'name_to_index_bytes_'. swap() will swap the contents but not the
@@ -226,6 +258,7 @@ Schema& Schema::operator=(Schema&& other) noexcept {
max_col_id_ = other.max_col_id_;
col_offsets_ = std::move(other.col_offsets_);
id_to_index_ = std::move(other.id_to_index_);
+ first_is_deleted_virtual_column_idx_ = other.first_is_deleted_virtual_column_idx_;
has_nullables_ = other.has_nullables_;
// See the comment in the move constructor implementation for why we swap.
@@ -295,6 +328,9 @@ Status Schema::Reset(const vector<ColumnSchema>& cols,
id_to_index_.set(ids[i], i);
}
+ RETURN_NOT_OK(FindFirstIsDeletedVirtualColumnIdx(
+ cols_, &first_is_deleted_virtual_column_idx_));
+
// Determine whether any column is nullable
has_nullables_ = false;
for (const ColumnSchema& col : cols_) {
@@ -406,15 +442,8 @@ Status Schema::GetMappedReadProjection(const Schema& projection,
int index = find_column(col.name());
if (col.type_info()->is_virtual()) {
DCHECK_EQ(kColumnNotFound, index) << "virtual column not expected in tablet schema";
- if (col.is_nullable()) {
- return Status::InvalidArgument(Substitute("Virtual column $0 $1 must not be nullable",
- col.name(), col.TypeToString()));
- }
- if (!col.has_read_default()) {
- return Status::InvalidArgument(Substitute("Virtual column $0 $1 must "
- "have a default value for read",
- col.name(), col.TypeToString()));
- }
+ DCHECK(!col.is_nullable()); // enforced by Schema constructor
+ DCHECK(col.has_read_default()); // enforced by Schema constructor
mapped_cols.push_back(col);
// Generate a "fake" column id for virtual columns.
mapped_ids.emplace_back(++proj_max_col_id);
diff --git a/src/kudu/common/schema.h b/src/kudu/common/schema.h
index 307ac3b..2466c13 100644
--- a/src/kudu/common/schema.h
+++ b/src/kudu/common/schema.h
@@ -446,6 +446,7 @@ class Schema {
NameToIndexMap::hasher(),
NameToIndexMap::key_equal(),
NameToIndexMapAllocator(&name_to_index_bytes_)),
+ first_is_deleted_virtual_column_idx_(kColumnNotFound),
has_nullables_(false) {
}
@@ -919,23 +920,8 @@ class Schema {
// Returns the column index for the first IS_DELETED virtual column in the
// schema, or kColumnNotFound if one cannot be found.
- //
- // The virtual column must not be nullable and must have a read default value.
- // The process will crash if these constraints are not met.
- int find_first_is_deleted_virtual_column() const {
- for (int idx = 0; idx < num_columns(); idx++) {
- const auto& col = column(idx);
- if (col.type_info()->type() == IS_DELETED) {
- // Enforce some properties on the virtual column that simplify our
- // implementation.
- // TODO(KUDU-2692): Consider removing these requirements.
- DCHECK(!col.is_nullable());
- DCHECK(col.has_read_default());
-
- return idx;
- }
- }
- return kColumnNotFound;
+ int first_is_deleted_virtual_column_idx() const {
+ return first_is_deleted_virtual_column_idx_;
}
private:
@@ -984,6 +970,10 @@ class Schema {
IdMapping id_to_index_;
+ // Cached index of the first IS_DELETED virtual column, or kColumnNotFound if
+ // no such virtual column exists in the schema.
+ int first_is_deleted_virtual_column_idx_;
+
// Cached indicator whether any columns are nullable.
bool has_nullables_;
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index d7fd360..045f144 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -824,6 +824,8 @@ TEST_F(MasterTest, TestVirtualColumns) {
col = req.mutable_schema()->add_columns();
col->set_name("bar");
col->set_type(IS_DELETED);
+ bool read_default = true;
+ col->set_read_default_value(&read_default, sizeof(read_default));
ASSERT_OK(proxy_->CreateTable(req, &resp, &controller));
SCOPED_TRACE(SecureDebugString(resp));
diff --git a/src/kudu/tablet/delta_store.cc b/src/kudu/tablet/delta_store.cc
index 636ad8a..3fd7e52 100644
--- a/src/kudu/tablet/delta_store.cc
+++ b/src/kudu/tablet/delta_store.cc
@@ -66,7 +66,6 @@ string DeltaKeyAndUpdate::Stringify(DeltaType type, const Schema& schema, bool p
template<class Traits>
DeltaPreparer<Traits>::DeltaPreparer(RowIteratorOptions opts)
: opts_(std::move(opts)),
- projection_vc_is_deleted_idx_(opts_.projection->find_first_is_deleted_virtual_column()),
cur_prepared_idx_(0),
prev_prepared_idx_(0),
prepared_flags_(DeltaIterator::PREPARE_NONE),
@@ -236,7 +235,7 @@ Status DeltaPreparer<Traits>::ApplyUpdates(size_t col_to_apply, ColumnBlock* dst
// Special handling for the IS_DELETED virtual column: convert 'deleted_' and
// 'reinserted_' into true and false cell values.
- if (col_to_apply == projection_vc_is_deleted_idx_) {
+ if (col_to_apply == opts_.projection->first_is_deleted_virtual_column_idx()) {
// See ApplyDeletes() to understand why we adjust the virtual column's value
// for both deleted and reinserted rows.
for (const auto& row_id : deleted_) {
diff --git a/src/kudu/tablet/delta_store.h b/src/kudu/tablet/delta_store.h
index 63d0657..9333581 100644
--- a/src/kudu/tablet/delta_store.h
+++ b/src/kudu/tablet/delta_store.h
@@ -351,10 +351,6 @@ class DeltaPreparer : public PreparedDeltas {
// Options with which the DeltaPreparer's iterator was constructed.
const RowIteratorOptions opts_;
- // The index of the first IS_DELETED virtual column in the projection schema,
- // or kColumnNotFound if one doesn't exist.
- const int projection_vc_is_deleted_idx_;
-
// The row index at which the most recent batch preparation ended.
rowid_t cur_prepared_idx_;
diff --git a/src/kudu/tablet/memrowset.cc b/src/kudu/tablet/memrowset.cc
index ec96c6f..9f56851 100644
--- a/src/kudu/tablet/memrowset.cc
+++ b/src/kudu/tablet/memrowset.cc
@@ -396,7 +396,7 @@ MemRowSet::Iterator::Iterator(const std::shared_ptr<const MemRowSet>& mrs,
projector_(
GenerateAppropriateProjector(&mrs->schema_nonvirtual(), opts_.projection)),
delta_projector_(&mrs->schema_nonvirtual(), opts_.projection),
- projection_vc_is_deleted_idx_(opts_.projection->find_first_is_deleted_virtual_column()),
+ projection_vc_is_deleted_idx_(opts_.projection->first_is_deleted_virtual_column_idx()),
state_(kUninitialized) {
// TODO(todd): various code assumes that a newly constructed iterator
// is pointed at the beginning of the dataset. This causes a redundant
diff --git a/src/kudu/tablet/tablet-test-util.h b/src/kudu/tablet/tablet-test-util.h
index 62167d5..07ddb66 100644
--- a/src/kudu/tablet/tablet-test-util.h
+++ b/src/kudu/tablet/tablet-test-util.h
@@ -902,8 +902,6 @@ void RunDeltaFuzzTest(const DeltaStore& store,
Schema projection = GetRandomProjection(*mirror->schema(), prng, kMaxColsToProject,
lower_ts ? AllowIsDeleted::YES :
AllowIsDeleted::NO);
- size_t projection_vc_is_deleted_idx =
- projection.find_first_is_deleted_virtual_column();
SCOPED_TRACE(Substitute("Projection $0", projection.ToString()));
RowIteratorOptions opts;
opts.projection = &projection;
@@ -979,7 +977,7 @@ void RunDeltaFuzzTest(const DeltaStore& store,
actual_scb[k] = 0;
}
const SelectionVector& filter = lower_ts ? actual_selected : actual_deleted;
- if (j == projection_vc_is_deleted_idx) {
+ if (j == opts.projection->first_is_deleted_virtual_column_idx()) {
// Reconstruct the expected IS_DELETED state using 'actual_selected'
// and 'actual_deleted', which we've already verified above.
DCHECK(lower_ts);