You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2016/04/08 08:06:18 UTC

incubator-kudu git commit: KUDU-777. Fix potential use-after-free after major delta compaction

Repository: incubator-kudu
Updated Branches:
  refs/heads/master 3339ce06b -> 1cda5dcd0


KUDU-777. Fix potential use-after-free after major delta compaction

This fixes the following race which could cause a crash:

- compaction policy is running on T1
- while constructing a RowSetTree, we call RowSet::GetBounds()
-- this calls base_data_->GetBounds() and returns Slices which point
   to storage owned by the CFileSet
- T2 completes a major delta compaction on the same rowset, which
  ends up destroying the original base_data_, thus invalidating the
  slices
- T1 continues and tries to access the slices, causing use-after-free

The fix is the most straight-forward one: we change GetBounds() to return
std::strings instead of Slices. There might be a small perf difference here,
but this call never shows up on hot paths like read or write operations.

I looped build/latest/bin/mt-tablet-test --gtest_filter=MultiThreadedTabletTest/1.DeleteAndReinsert
1000 times in an ASAN build. Before the patch, it failed 2/1000 with
a use-after-free on the bounds Slice. With the patch, it passed
1000/1000.

Change-Id: Ife1adaa3125642fc96364be69f42989800241256
Reviewed-on: http://gerrit.cloudera.org:8080/2731
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/1cda5dcd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/1cda5dcd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/1cda5dcd

Branch: refs/heads/master
Commit: 1cda5dcd0bc4a9fd41ffd4d13b60c473c04a9ff5
Parents: 3339ce0
Author: Todd Lipcon <to...@apache.org>
Authored: Thu Apr 7 14:59:32 2016 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Fri Apr 8 06:05:59 2016 +0000

----------------------------------------------------------------------
 src/kudu/tablet/cfile_set.cc        |  8 +++---
 src/kudu/tablet/cfile_set.h         |  4 +--
 src/kudu/tablet/diskrowset.cc       |  4 +--
 src/kudu/tablet/diskrowset.h        |  4 +--
 src/kudu/tablet/memrowset.cc        |  4 +--
 src/kudu/tablet/memrowset.h         |  4 +--
 src/kudu/tablet/mock-rowsets.h      | 12 ++++-----
 src/kudu/tablet/rowset.cc           |  6 ++---
 src/kudu/tablet/rowset.h            | 12 ++++-----
 src/kudu/tablet/rowset_info.cc      | 43 +++++++++++++-------------------
 src/kudu/tablet/rowset_info.h       | 12 +++++++++
 src/kudu/tablet/rowset_tree-test.cc |  8 +++---
 src/kudu/tablet/rowset_tree.cc      |  6 ++---
 13 files changed, 63 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/cfile_set.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/cfile_set.cc b/src/kudu/tablet/cfile_set.cc
index 981e8b4..b65d20d 100644
--- a/src/kudu/tablet/cfile_set.cc
+++ b/src/kudu/tablet/cfile_set.cc
@@ -178,10 +178,10 @@ Status CFileSet::CountRows(rowid_t *count) const {
   return key_index_reader()->CountRows(count);
 }
 
-Status CFileSet::GetBounds(Slice *min_encoded_key,
-                           Slice *max_encoded_key) const {
-  *min_encoded_key = Slice(min_encoded_key_);
-  *max_encoded_key = Slice(max_encoded_key_);
+Status CFileSet::GetBounds(string* min_encoded_key,
+                           string* max_encoded_key) const {
+  *min_encoded_key = min_encoded_key_;
+  *max_encoded_key = max_encoded_key_;
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/cfile_set.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/cfile_set.h b/src/kudu/tablet/cfile_set.h
index 5e00905..9979069 100644
--- a/src/kudu/tablet/cfile_set.h
+++ b/src/kudu/tablet/cfile_set.h
@@ -68,8 +68,8 @@ class CFileSet : public std::enable_shared_from_this<CFileSet> {
   Status CountRows(rowid_t *count) const;
 
   // See RowSet::GetBounds
-  virtual Status GetBounds(Slice *min_encoded_key,
-                           Slice *max_encoded_key) const;
+  virtual Status GetBounds(std::string* min_encoded_key,
+                           std::string* max_encoded_key) const;
 
   uint64_t EstimateOnDiskSize() const;
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/diskrowset.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/diskrowset.cc b/src/kudu/tablet/diskrowset.cc
index 875f2a7..a933083 100644
--- a/src/kudu/tablet/diskrowset.cc
+++ b/src/kudu/tablet/diskrowset.cc
@@ -644,8 +644,8 @@ Status DiskRowSet::CountRows(rowid_t *count) const {
   return base_data_->CountRows(count);
 }
 
-Status DiskRowSet::GetBounds(Slice *min_encoded_key,
-                             Slice *max_encoded_key) const {
+Status DiskRowSet::GetBounds(std::string* min_encoded_key,
+                             std::string* max_encoded_key) const {
   DCHECK(open_);
   boost::shared_lock<rw_spinlock> lock(component_lock_.get_lock());
   return base_data_->GetBounds(min_encoded_key, max_encoded_key);

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/diskrowset.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/diskrowset.h b/src/kudu/tablet/diskrowset.h
index 0bc0430..f82eb11 100644
--- a/src/kudu/tablet/diskrowset.h
+++ b/src/kudu/tablet/diskrowset.h
@@ -322,8 +322,8 @@ class DiskRowSet : public RowSet {
   Status CountRows(rowid_t *count) const OVERRIDE;
 
   // See RowSet::GetBounds(...)
-  virtual Status GetBounds(Slice *min_encoded_key,
-                           Slice *max_encoded_key) const OVERRIDE;
+  virtual Status GetBounds(std::string* min_encoded_key,
+                           std::string* max_encoded_key) const OVERRIDE;
 
   // Estimate the number of bytes on-disk for the base data.
   uint64_t EstimateBaseDataDiskSize() const;

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/memrowset.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/memrowset.cc b/src/kudu/tablet/memrowset.cc
index 5b18196..ee49e3b 100644
--- a/src/kudu/tablet/memrowset.cc
+++ b/src/kudu/tablet/memrowset.cc
@@ -298,8 +298,8 @@ Status MemRowSet::NewCompactionInput(const Schema* projection,
   return Status::OK();
 }
 
-Status MemRowSet::GetBounds(Slice *min_encoded_key,
-                            Slice *max_encoded_key) const {
+Status MemRowSet::GetBounds(string *min_encoded_key,
+                            string *max_encoded_key) const {
   return Status::NotSupported("");
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/memrowset.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/memrowset.h b/src/kudu/tablet/memrowset.h
index dee7db2..d494c1c 100644
--- a/src/kudu/tablet/memrowset.h
+++ b/src/kudu/tablet/memrowset.h
@@ -220,8 +220,8 @@ class MemRowSet : public RowSet,
     return Status::OK();
   }
 
-  virtual Status GetBounds(Slice *min_encoded_key,
-                           Slice *max_encoded_key) const OVERRIDE;
+  virtual Status GetBounds(std::string *min_encoded_key,
+                           std::string *max_encoded_key) const OVERRIDE;
 
   uint64_t EstimateOnDiskSize() const OVERRIDE {
     return 0;

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/mock-rowsets.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/mock-rowsets.h b/src/kudu/tablet/mock-rowsets.h
index ed050b6..bccdf67 100644
--- a/src/kudu/tablet/mock-rowsets.h
+++ b/src/kudu/tablet/mock-rowsets.h
@@ -130,10 +130,10 @@ class MockDiskRowSet : public MockRowSet {
         last_key_(std::move(last_key)),
         size_(size) {}
 
-  virtual Status GetBounds(Slice *min_encoded_key,
-                           Slice *max_encoded_key) const OVERRIDE {
-    *min_encoded_key = Slice(first_key_);
-    *max_encoded_key = Slice(last_key_);
+  virtual Status GetBounds(std::string* min_encoded_key,
+                           std::string* max_encoded_key) const OVERRIDE {
+    *min_encoded_key = first_key_;
+    *max_encoded_key = last_key_;
     return Status::OK();
   }
 
@@ -156,8 +156,8 @@ class MockDiskRowSet : public MockRowSet {
 // Mock which acts like a MemRowSet and has no known bounds.
 class MockMemRowSet : public MockRowSet {
  public:
-  virtual Status GetBounds(Slice *min_encoded_key,
-                           Slice *max_encoded_key) const OVERRIDE {
+  virtual Status GetBounds(std::string* min_encoded_key,
+                           std::string* max_encoded_key) const OVERRIDE {
     return Status::NotSupported("");
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/rowset.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset.cc b/src/kudu/tablet/rowset.cc
index 247393c..71a686d 100644
--- a/src/kudu/tablet/rowset.cc
+++ b/src/kudu/tablet/rowset.cc
@@ -186,13 +186,13 @@ Status DuplicatingRowSet::CountRows(rowid_t *count) const {
   return Status::OK();
 }
 
-Status DuplicatingRowSet::GetBounds(Slice *min_encoded_key,
-                                    Slice *max_encoded_key) const {
+Status DuplicatingRowSet::GetBounds(string* min_encoded_key,
+                                    string* max_encoded_key) const {
   // The range out of the output rowset always spans the full range
   // of the input rowsets, since no new rows can be inserted.
   // The output rowsets are in ascending order, so their total range
   // spans the range [front().min, back().max].
-  Slice junk;
+  string junk;
   RETURN_NOT_OK(new_rowsets_.front()->GetBounds(min_encoded_key, &junk));
   RETURN_NOT_OK(new_rowsets_.back()->GetBounds(&junk, max_encoded_key));
   return Status::OK();

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/rowset.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset.h b/src/kudu/tablet/rowset.h
index 3a301d8..4890410 100644
--- a/src/kudu/tablet/rowset.h
+++ b/src/kudu/tablet/rowset.h
@@ -99,14 +99,12 @@ class RowSet {
   virtual Status CountRows(rowid_t *count) const = 0;
 
   // Return the bounds for this RowSet. 'min_encoded_key' and 'max_encoded_key'
-  // are set to the first and last encoded keys for this RowSet. The storage
-  // for these slices is part of the RowSet and only guaranteed to stay valid
-  // until the RowSet is destroyed.
+  // are set to the first and last encoded keys for this RowSet.
   //
   // In the case that the rowset is still mutable (eg MemRowSet), this may
   // return Status::NotImplemented.
-  virtual Status GetBounds(Slice *min_encoded_key,
-                           Slice *max_encoded_key) const = 0;
+  virtual Status GetBounds(std::string* min_encoded_key,
+                           std::string* max_encoded_key) const = 0;
 
   // Return a displayable string for this rowset.
   virtual string ToString() const = 0;
@@ -273,8 +271,8 @@ class DuplicatingRowSet : public RowSet {
 
   Status CountRows(rowid_t *count) const OVERRIDE;
 
-  virtual Status GetBounds(Slice *min_encoded_key,
-                           Slice *max_encoded_key) const OVERRIDE;
+  virtual Status GetBounds(std::string* min_encoded_key,
+                           std::string* max_encoded_key) const OVERRIDE;
 
   uint64_t EstimateOnDiskSize() const OVERRIDE;
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/rowset_info.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_info.cc b/src/kudu/tablet/rowset_info.cc
index ddbf12b..8ed1028 100644
--- a/src/kudu/tablet/rowset_info.cc
+++ b/src/kudu/tablet/rowset_info.cc
@@ -47,20 +47,14 @@ namespace tablet {
 
 namespace {
 
-// Less-than comparison by minimum key (both by actual key slice and cdf)
+// Less-than comparison by minimum key (both by actual encoded key and cdf)
 bool LessCDFAndRSMin(const RowSetInfo& a, const RowSetInfo& b) {
-  Slice amin, bmin, max;
-  a.rowset()->GetBounds(&amin, &max);
-  b.rowset()->GetBounds(&bmin, &max);
-  return a.cdf_min_key() < b.cdf_min_key() && amin.compare(bmin) < 0;
+  return a.cdf_min_key() < b.cdf_min_key() && a.min_key().compare(b.min_key()) < 0;
 }
 
 // Less-than comparison by maximum key (both by actual key slice and cdf)
 bool LessCDFAndRSMax(const RowSetInfo& a, const RowSetInfo& b) {
-  Slice amax, bmax, min;
-  a.rowset()->GetBounds(&min, &amax);
-  b.rowset()->GetBounds(&min, &bmax);
-  return a.cdf_max_key() < b.cdf_max_key() && amax.compare(bmax) < 0;
+  return a.cdf_max_key() < b.cdf_max_key() && a.max_key().compare(b.max_key()) < 0;
 }
 
 // Debug-checks that min <= imin <= imax <= max
@@ -106,12 +100,13 @@ uint64_t SliceTailToInt(const Slice& slice, int start) {
 
 // Finds fraction (imin, imax) takes up of rs->GetBounds().
 // Requires that (imin, imax) is contained in rs->GetBounds().
-double StringFractionInRange(const RowSet* rs,
+double StringFractionInRange(const RowSetInfo* rsi,
                              const Slice& imin,
                              const Slice& imax) {
-  Slice min, max;
-  if (!rs->GetBounds(&min, &max).ok()) {
-    VLOG(2) << "Ignoring " << rs->ToString() << " in CDF calculation";
+  Slice min(rsi->min_key());
+  Slice max(rsi->max_key());
+  if (!rsi->has_bounds()) {
+    VLOG(2) << "Ignoring " << rsi->rowset()->ToString() << " in CDF calculation";
     return 0;
   }
   DCheckInside(min, max, imin, imax);
@@ -130,9 +125,6 @@ double StringFractionInRange(const RowSet* rs,
   return static_cast<double>(imax_int - imin_int) / (max_int - min_int);
 }
 
-// Typedef needed to use boost foreach macro
-typedef unordered_map<RowSet*, RowSetInfo*>::value_type RowSetRowSetInfoPair;
-
 // Computes the "width" of an interval [prev, next] according to the amount
 // of data estimated to be inside the interval, where this is calculated by
 // multiplying the fraction that the interval takes up in the keyspace of
@@ -143,10 +135,9 @@ double WidthByDataSize(const Slice& prev, const Slice& next,
                        const unordered_map<RowSet*, RowSetInfo*>& active) {
   double weight = 0;
 
-  for (const RowSetRowSetInfoPair& rsi : active) {
-    RowSet* rs = rsi.first;
-    double fraction = StringFractionInRange(rs, prev, next);
-    weight += rs->EstimateOnDiskSize() * fraction;
+  for (const auto& rs_rsi : active) {
+    double fraction = StringFractionInRange(rs_rsi.second, prev, next);
+    weight += rs_rsi.first->EstimateOnDiskSize() * fraction;
   }
 
   return weight;
@@ -223,8 +214,8 @@ void RowSetInfo::CollectOrdered(const RowSetTree& tree,
     double interval_width = WidthByDataSize(prev, next, active);
 
     // Increment active rowsets in min_key by the interval_width.
-    for (const RowSetRowSetInfoPair& rsi : active) {
-      RowSetInfo& cdf_rs = *rsi.second;
+    for (const auto& rs_rsi : active) {
+      RowSetInfo& cdf_rs = *rs_rsi.second;
       cdf_rs.cdf_max_key_ += interval_width;
     }
 
@@ -266,6 +257,7 @@ RowSetInfo::RowSetInfo(RowSet* rs, double init_cdf)
                       kMinSizeMb)),
     cdf_min_key_(init_cdf),
     cdf_max_key_(init_cdf) {
+  has_bounds_ = rs->GetBounds(&min_key_, &max_key_).ok();
 }
 
 void RowSetInfo::FinalizeCDFVector(vector<RowSetInfo>* vec,
@@ -288,10 +280,9 @@ string RowSetInfo::ToString() const {
   ret.append(rowset_->ToString());
   StringAppendF(&ret, "(% 3dM) [%.04f, %.04f]", size_mb_,
                 cdf_min_key_, cdf_max_key_);
-  Slice min, max;
-  if (rowset_->GetBounds(&min, &max).ok()) {
-    ret.append(" [").append(min.ToDebugString());
-    ret.append(",").append(max.ToDebugString());
+  if (has_bounds_) {
+    ret.append(" [").append(Slice(min_key_).ToDebugString());
+    ret.append(",").append(Slice(max_key_).ToDebugString());
     ret.append("]");
   }
   return ret;

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/rowset_info.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_info.h b/src/kudu/tablet/rowset_info.h
index df3a63a..cb315dc 100644
--- a/src/kudu/tablet/rowset_info.h
+++ b/src/kudu/tablet/rowset_info.h
@@ -48,6 +48,10 @@ class RowSetInfo {
   // Return the value of the CDF at the maximum key of this candidate.
   double cdf_max_key() const { return cdf_max_key_; }
 
+  bool has_bounds() const { return has_bounds_; }
+  const std::string& min_key() const { return min_key_; }
+  const std::string& max_key() const { return max_key_; }
+
   // Return the "width" of the candidate rowset.
   //
   // This is an estimate of the percentage of the tablet data which
@@ -78,6 +82,14 @@ class RowSetInfo {
 
   RowSet* rowset_;
   int size_mb_;
+
+  // True if the RowSet has known bounds.
+  // MemRowSets in particular do not.
+  bool has_bounds_;
+
+  // The bounds, if known.
+  std::string min_key_, max_key_;
+
   double cdf_min_key_, cdf_max_key_;
   double density_;
 };

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/rowset_tree-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_tree-test.cc b/src/kudu/tablet/rowset_tree-test.cc
index e3488be..c3efea4 100644
--- a/src/kudu/tablet/rowset_tree-test.cc
+++ b/src/kudu/tablet/rowset_tree-test.cc
@@ -159,16 +159,14 @@ TEST_F(TestRowSetTree, TestEndpointsConsistency) {
       ASSERT_LE(prev.compare(slice), 0);
     }
 
-    Slice min, max;
+    string min, max;
     ASSERT_OK(rs->GetBounds(&min, &max));
     if (ept == RowSetTree::START) {
-      ASSERT_EQ(min.data(), slice.data());
-      ASSERT_EQ(min.size(), slice.size());
+      ASSERT_EQ(min, slice.ToString());
       ASSERT_TRUE(InsertIfNotPresent(&open, rs));
       ASSERT_TRUE(InsertIfNotPresent(&visited, rs));
     } else if (ept == RowSetTree::STOP) {
-      ASSERT_EQ(max.data(), slice.data());
-      ASSERT_EQ(max.size(), slice.size());
+      ASSERT_EQ(max, slice.ToString());
       ASSERT_TRUE(open.erase(rs) == 1);
     } else {
       FAIL() << "No such endpoint type exists";

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/1cda5dcd/src/kudu/tablet/rowset_tree.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_tree.cc b/src/kudu/tablet/rowset_tree.cc
index b4f114c..15fbd23 100644
--- a/src/kudu/tablet/rowset_tree.cc
+++ b/src/kudu/tablet/rowset_tree.cc
@@ -94,7 +94,7 @@ Status RowSetTree::Reset(const RowSetVector &rowsets) {
   for (const shared_ptr<RowSet> &rs : rowsets) {
     gscoped_ptr<RowSetWithBounds> rsit(new RowSetWithBounds());
     rsit->rowset = rs.get();
-    Slice min_key, max_key;
+    string min_key, max_key;
     Status s = rs->GetBounds(&min_key, &max_key);
     if (s.IsNotSupported()) {
       // This rowset is a MemRowSet, for which the bounds change as more
@@ -116,8 +116,8 @@ Status RowSetTree::Reset(const RowSetVector &rowsets) {
     endpoints.push_back(RSEndpoint(rsit->rowset, STOP, max_key));
 
     // Load bounds and save entry
-    rsit->min_key = min_key.ToString();
-    rsit->max_key = max_key.ToString();
+    rsit->min_key = min_key;
+    rsit->max_key = max_key;
     entries.push_back(rsit.release());
   }