You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2019/06/07 23:49:34 UTC

[kudu] branch master updated: schema: cache first is_deleted virtual column index

This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new bf2b3e8  schema: cache first is_deleted virtual column index
bf2b3e8 is described below

commit bf2b3e8d81b3a137a279cbf808badf1eaa8d9d9c
Author: Adar Dembo <ad...@cloudera.com>
AuthorDate: Thu Jun 6 22:35:04 2019 -0700

    schema: cache first is_deleted virtual column index
    
    This mirrors what we do in Java, and will make it easier to implement
    KuduScanBatch::RowPtr::IsDeleted. On the other hand, it complicates Schema
    somewhat, so is it really worth it?
    
    Change-Id: I018daa0c432f909942d5107df1acab3477788683
    Reviewed-on: http://gerrit.cloudera.org:8080/13554
    Tested-by: Kudu Jenkins
    Reviewed-by: Mike Percy <mp...@apache.org>
---
 src/kudu/common/generic_iterators-test.cc |  8 +++---
 src/kudu/common/generic_iterators.cc      | 15 ++++------
 src/kudu/common/schema-test.cc            | 24 ++++++++--------
 src/kudu/common/schema.cc                 | 47 +++++++++++++++++++++++++------
 src/kudu/common/schema.h                  | 24 +++++-----------
 src/kudu/master/master-test.cc            |  2 ++
 src/kudu/tablet/delta_store.cc            |  3 +-
 src/kudu/tablet/delta_store.h             |  4 ---
 src/kudu/tablet/memrowset.cc              |  2 +-
 src/kudu/tablet/tablet-test-util.h        |  4 +--
 10 files changed, 72 insertions(+), 61 deletions(-)

diff --git a/src/kudu/common/generic_iterators-test.cc b/src/kudu/common/generic_iterators-test.cc
index d170ffa..190dff4 100644
--- a/src/kudu/common/generic_iterators-test.cc
+++ b/src/kudu/common/generic_iterators-test.cc
@@ -399,8 +399,6 @@ void TestMerge(const Schema& schema,
 
   VLOG(1) << "Predicate expects " << expected.size() << " results: " << expected;
 
-  const int kIsDeletedIndex = schema.find_first_is_deleted_virtual_column();
-
   for (int trial = 0; trial < FLAGS_num_iters; trial++) {
     vector<IterWithBounds> to_merge;
     for (const auto& list : all_ints) {
@@ -454,8 +452,10 @@ void TestMerge(const Schema& schema,
           EXPECT_EQ(expected_key, row_key) << "Yielded out of order at idx " << total_idx;
           bool is_deleted = false;
           if (include_deleted_rows) {
-            CHECK_NE(Schema::kColumnNotFound, kIsDeletedIndex);
-            is_deleted = *schema.ExtractColumnFromRow<IS_DELETED>(dst.row(i), kIsDeletedIndex);
+            CHECK_NE(Schema::kColumnNotFound,
+                     schema.first_is_deleted_virtual_column_idx());
+            is_deleted = *schema.ExtractColumnFromRow<IS_DELETED>(
+                dst.row(i), schema.first_is_deleted_virtual_column_idx());
             EXPECT_EQ(expected_is_deleted, is_deleted)
                 << "Row " << row_key << " has unexpected IS_DELETED value at index " << total_idx;
           }
diff --git a/src/kudu/common/generic_iterators.cc b/src/kudu/common/generic_iterators.cc
index 8752ab5..3c1c987 100644
--- a/src/kudu/common/generic_iterators.cc
+++ b/src/kudu/common/generic_iterators.cc
@@ -507,10 +507,6 @@ class MergeIterator : public RowwiseIterator {
 
   bool initted_;
 
-  // Index of the IS_DELETED column, or Schema::kColumnNotFound if no such
-  // column exists in the schema.
-  int is_deleted_col_index_;
-
   // Holds the sub-iterators until Init is called, at which point this is
   // cleared. This is required because we can't create a MergeIterState of an
   // uninitialized sub-iterator.
@@ -629,8 +625,8 @@ Status MergeIterator::Init(ScanSpec *spec) {
   }
 #endif
 
-  is_deleted_col_index_ = schema_->find_first_is_deleted_virtual_column();
-  if (opts_.include_deleted_rows && is_deleted_col_index_ == Schema::kColumnNotFound) {
+  if (opts_.include_deleted_rows &&
+      schema_->first_is_deleted_virtual_column_idx() == Schema::kColumnNotFound) {
     return Status::InvalidArgument("Merge iterator cannot deduplicate deleted "
                                    "rows without an IS_DELETED column");
   }
@@ -865,7 +861,8 @@ Status MergeIterator::MaterializeOneRow(RowBlock* dst, size_t* dst_row_idx) {
     int live_rows_found = 0;
     for (const auto& s : smallest) {
       bool is_deleted =
-          *schema_->ExtractColumnFromRow<IS_DELETED>(s->next_row(), is_deleted_col_index_);
+          *schema_->ExtractColumnFromRow<IS_DELETED>(
+              s->next_row(), schema_->first_is_deleted_virtual_column_idx());
       if (!is_deleted) {
         // We found the single live instance of the row.
         row_to_return_iter = s;
@@ -882,8 +879,8 @@ Status MergeIterator::MaterializeOneRow(RowBlock* dst, size_t* dst_row_idx) {
     // deleted instance.
     if (row_to_return_iter == nullptr) {
       row_to_return_iter = smallest[0];
-      DCHECK(*schema_->ExtractColumnFromRow<IS_DELETED>(row_to_return_iter->next_row(),
-                                                        is_deleted_col_index_))
+      DCHECK(*schema_->ExtractColumnFromRow<IS_DELETED>(
+          row_to_return_iter->next_row(), schema_->first_is_deleted_virtual_column_idx()))
           << "expected deleted row";
     }
   }
diff --git a/src/kudu/common/schema-test.cc b/src/kudu/common/schema-test.cc
index 0d26bbb..16e4dcd 100644
--- a/src/kudu/common/schema-test.cc
+++ b/src/kudu/common/schema-test.cc
@@ -475,21 +475,21 @@ TEST_F(TestSchema, TestGetMappedReadProjection) {
 
   // Ensure that virtual columns that are nullable or that do not have read
   // defaults are rejected.
-  Schema nullable_projection({ ColumnSchema("key", STRING),
-                               ColumnSchema("deleted", IS_DELETED,
-                                            /*is_nullable=*/true,
-                                            /*read_default=*/&kReadDefault) },
-                          1);
-  Status s = tablet_schema.GetMappedReadProjection(nullable_projection, &mapped);
+  Schema nullable_projection;
+  Status s = nullable_projection.Reset({ ColumnSchema("key", STRING),
+                                         ColumnSchema("deleted", IS_DELETED,
+                                                      /*is_nullable=*/true,
+                                                      /*read_default=*/&kReadDefault) },
+                                       1);
   ASSERT_FALSE(s.ok());
   ASSERT_STR_CONTAINS(s.ToString(), "must not be nullable");
 
-  Schema no_default_projection({ ColumnSchema("key", STRING),
-                                 ColumnSchema("deleted", IS_DELETED,
-                                              /*is_nullable=*/false,
-                                              /*read_default=*/nullptr) },
-                               1);
-  s = tablet_schema.GetMappedReadProjection(no_default_projection, &mapped);
+  Schema no_default_projection;
+  s = no_default_projection.Reset({ ColumnSchema("key", STRING),
+                                    ColumnSchema("deleted", IS_DELETED,
+                                                 /*is_nullable=*/false,
+                                                 /*read_default=*/nullptr) },
+                                  1);
   ASSERT_FALSE(s.ok());
   ASSERT_STR_CONTAINS(s.ToString(), "must have a default value for read");
 }
diff --git a/src/kudu/common/schema.cc b/src/kudu/common/schema.cc
index 8a5509e..65659c3 100644
--- a/src/kudu/common/schema.cc
+++ b/src/kudu/common/schema.cc
@@ -50,6 +50,36 @@ static const ColumnId kFirstColumnId(0);
 static const ColumnId  kFirstColumnId(10);
 #endif
 
+namespace {
+
+Status FindFirstIsDeletedVirtualColumnIdx(
+    const vector<ColumnSchema>& cols, int* idx) {
+  for (int i = 0; i < cols.size(); i++) {
+    const auto& col = cols[i];
+    if (col.type_info()->type() == IS_DELETED) {
+      // Enforce some properties on the virtual column that simplify our
+      // implementation.
+      // TODO(KUDU-2692): Consider removing these requirements.
+      if (col.is_nullable()) {
+        return Status::InvalidArgument(Substitute(
+            "Virtual column $0 $1 must not be nullable",
+            col.name(), col.TypeToString()));
+      }
+      if (!col.has_read_default()) {
+        return Status::InvalidArgument(Substitute(
+            "Virtual column $0 $1 must have a default value for read",
+            col.name(), col.TypeToString()));
+      }
+      *idx = i;
+      return Status::OK();
+    }
+  }
+  *idx = Schema::kColumnNotFound;
+  return Status::OK();
+}
+
+} // anonymous namespace
+
 bool ColumnTypeAttributes::EqualsForType(ColumnTypeAttributes other,
                                          DataType type) const {
   switch (type) {
@@ -189,6 +219,7 @@ void Schema::CopyFrom(const Schema& other) {
     name_to_index_[col.name()] = i++;
   }
 
+  first_is_deleted_virtual_column_idx_ = other.first_is_deleted_virtual_column_idx_;
   has_nullables_ = other.has_nullables_;
 }
 
@@ -204,6 +235,7 @@ Schema::Schema(Schema&& other) noexcept
                      NameToIndexMap::key_equal(),
                      NameToIndexMapAllocator(&name_to_index_bytes_)),
       id_to_index_(std::move(other.id_to_index_)),
+      first_is_deleted_virtual_column_idx_(other.first_is_deleted_virtual_column_idx_),
       has_nullables_(other.has_nullables_) {
   // 'name_to_index_' uses a customer allocator which holds a pointer to
   // 'name_to_index_bytes_'. swap() will swap the contents but not the
@@ -226,6 +258,7 @@ Schema& Schema::operator=(Schema&& other) noexcept {
     max_col_id_ = other.max_col_id_;
     col_offsets_ = std::move(other.col_offsets_);
     id_to_index_ = std::move(other.id_to_index_);
+    first_is_deleted_virtual_column_idx_ = other.first_is_deleted_virtual_column_idx_;
     has_nullables_ = other.has_nullables_;
 
     // See the comment in the move constructor implementation for why we swap.
@@ -295,6 +328,9 @@ Status Schema::Reset(const vector<ColumnSchema>& cols,
     id_to_index_.set(ids[i], i);
   }
 
+  RETURN_NOT_OK(FindFirstIsDeletedVirtualColumnIdx(
+      cols_, &first_is_deleted_virtual_column_idx_));
+
   // Determine whether any column is nullable
   has_nullables_ = false;
   for (const ColumnSchema& col : cols_) {
@@ -406,15 +442,8 @@ Status Schema::GetMappedReadProjection(const Schema& projection,
     int index = find_column(col.name());
     if (col.type_info()->is_virtual()) {
       DCHECK_EQ(kColumnNotFound, index) << "virtual column not expected in tablet schema";
-      if (col.is_nullable()) {
-        return Status::InvalidArgument(Substitute("Virtual column $0 $1 must not be nullable",
-                                                  col.name(), col.TypeToString()));
-      }
-      if (!col.has_read_default()) {
-        return Status::InvalidArgument(Substitute("Virtual column $0 $1 must "
-                                                  "have a default value for read",
-                                                  col.name(), col.TypeToString()));
-      }
+      DCHECK(!col.is_nullable()); // enforced by Schema constructor
+      DCHECK(col.has_read_default()); // enforced by Schema constructor
       mapped_cols.push_back(col);
       // Generate a "fake" column id for virtual columns.
       mapped_ids.emplace_back(++proj_max_col_id);
diff --git a/src/kudu/common/schema.h b/src/kudu/common/schema.h
index 307ac3b..2466c13 100644
--- a/src/kudu/common/schema.h
+++ b/src/kudu/common/schema.h
@@ -446,6 +446,7 @@ class Schema {
                      NameToIndexMap::hasher(),
                      NameToIndexMap::key_equal(),
                      NameToIndexMapAllocator(&name_to_index_bytes_)),
+      first_is_deleted_virtual_column_idx_(kColumnNotFound),
       has_nullables_(false) {
   }
 
@@ -919,23 +920,8 @@ class Schema {
 
   // Returns the column index for the first IS_DELETED virtual column in the
   // schema, or kColumnNotFound if one cannot be found.
-  //
-  // The virtual column must not be nullable and must have a read default value.
-  // The process will crash if these constraints are not met.
-  int find_first_is_deleted_virtual_column() const {
-    for (int idx = 0; idx < num_columns(); idx++) {
-      const auto& col = column(idx);
-      if (col.type_info()->type() == IS_DELETED) {
-        // Enforce some properties on the virtual column that simplify our
-        // implementation.
-        // TODO(KUDU-2692): Consider removing these requirements.
-        DCHECK(!col.is_nullable());
-        DCHECK(col.has_read_default());
-
-        return idx;
-      }
-    }
-    return kColumnNotFound;
+  int first_is_deleted_virtual_column_idx() const {
+    return first_is_deleted_virtual_column_idx_;
   }
 
  private:
@@ -984,6 +970,10 @@ class Schema {
 
   IdMapping id_to_index_;
 
+  // Cached index of the first IS_DELETED virtual column, or kColumnNotFound if
+  // no such virtual column exists in the schema.
+  int first_is_deleted_virtual_column_idx_;
+
   // Cached indicator whether any columns are nullable.
   bool has_nullables_;
 
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index d7fd360..045f144 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -824,6 +824,8 @@ TEST_F(MasterTest, TestVirtualColumns) {
   col = req.mutable_schema()->add_columns();
   col->set_name("bar");
   col->set_type(IS_DELETED);
+  bool read_default = true;
+  col->set_read_default_value(&read_default, sizeof(read_default));
 
   ASSERT_OK(proxy_->CreateTable(req, &resp, &controller));
   SCOPED_TRACE(SecureDebugString(resp));
diff --git a/src/kudu/tablet/delta_store.cc b/src/kudu/tablet/delta_store.cc
index 636ad8a..3fd7e52 100644
--- a/src/kudu/tablet/delta_store.cc
+++ b/src/kudu/tablet/delta_store.cc
@@ -66,7 +66,6 @@ string DeltaKeyAndUpdate::Stringify(DeltaType type, const Schema& schema, bool p
 template<class Traits>
 DeltaPreparer<Traits>::DeltaPreparer(RowIteratorOptions opts)
     : opts_(std::move(opts)),
-      projection_vc_is_deleted_idx_(opts_.projection->find_first_is_deleted_virtual_column()),
       cur_prepared_idx_(0),
       prev_prepared_idx_(0),
       prepared_flags_(DeltaIterator::PREPARE_NONE),
@@ -236,7 +235,7 @@ Status DeltaPreparer<Traits>::ApplyUpdates(size_t col_to_apply, ColumnBlock* dst
 
   // Special handling for the IS_DELETED virtual column: convert 'deleted_' and
   // 'reinserted_' into true and false cell values.
-  if (col_to_apply == projection_vc_is_deleted_idx_) {
+  if (col_to_apply == opts_.projection->first_is_deleted_virtual_column_idx()) {
     // See ApplyDeletes() to understand why we adjust the virtual column's value
     // for both deleted and reinserted rows.
     for (const auto& row_id : deleted_) {
diff --git a/src/kudu/tablet/delta_store.h b/src/kudu/tablet/delta_store.h
index 63d0657..9333581 100644
--- a/src/kudu/tablet/delta_store.h
+++ b/src/kudu/tablet/delta_store.h
@@ -351,10 +351,6 @@ class DeltaPreparer : public PreparedDeltas {
   // Options with which the DeltaPreparer's iterator was constructed.
   const RowIteratorOptions opts_;
 
-  // The index of the first IS_DELETED virtual column in the projection schema,
-  // or kColumnNotFound if one doesn't exist.
-  const int projection_vc_is_deleted_idx_;
-
   // The row index at which the most recent batch preparation ended.
   rowid_t cur_prepared_idx_;
 
diff --git a/src/kudu/tablet/memrowset.cc b/src/kudu/tablet/memrowset.cc
index ec96c6f..9f56851 100644
--- a/src/kudu/tablet/memrowset.cc
+++ b/src/kudu/tablet/memrowset.cc
@@ -396,7 +396,7 @@ MemRowSet::Iterator::Iterator(const std::shared_ptr<const MemRowSet>& mrs,
       projector_(
           GenerateAppropriateProjector(&mrs->schema_nonvirtual(), opts_.projection)),
       delta_projector_(&mrs->schema_nonvirtual(), opts_.projection),
-      projection_vc_is_deleted_idx_(opts_.projection->find_first_is_deleted_virtual_column()),
+      projection_vc_is_deleted_idx_(opts_.projection->first_is_deleted_virtual_column_idx()),
       state_(kUninitialized) {
   // TODO(todd): various code assumes that a newly constructed iterator
   // is pointed at the beginning of the dataset. This causes a redundant
diff --git a/src/kudu/tablet/tablet-test-util.h b/src/kudu/tablet/tablet-test-util.h
index 62167d5..07ddb66 100644
--- a/src/kudu/tablet/tablet-test-util.h
+++ b/src/kudu/tablet/tablet-test-util.h
@@ -902,8 +902,6 @@ void RunDeltaFuzzTest(const DeltaStore& store,
     Schema projection = GetRandomProjection(*mirror->schema(), prng, kMaxColsToProject,
                                             lower_ts ? AllowIsDeleted::YES :
                                                        AllowIsDeleted::NO);
-    size_t projection_vc_is_deleted_idx =
-        projection.find_first_is_deleted_virtual_column();
     SCOPED_TRACE(Substitute("Projection $0", projection.ToString()));
     RowIteratorOptions opts;
     opts.projection = &projection;
@@ -979,7 +977,7 @@ void RunDeltaFuzzTest(const DeltaStore& store,
           actual_scb[k] = 0;
         }
         const SelectionVector& filter = lower_ts ? actual_selected : actual_deleted;
-        if (j == projection_vc_is_deleted_idx) {
+        if (j == opts.projection->first_is_deleted_virtual_column_idx()) {
           // Reconstruct the expected IS_DELETED state using 'actual_selected'
           // and 'actual_deleted', which we've already verified above.
           DCHECK(lower_ts);