You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2018/12/21 22:44:13 UTC

kudu git commit: tablet: add support for diff scan iterator

Repository: kudu
Updated Branches:
  refs/heads/master 8525a9336 -> 236fdfe83


tablet: add support for diff scan iterator

This commit adds plumbing to support creating an iterator at the Tablet
level that supports snap_to_exclude and include_deleted_rows.

Also added a test that validates that ghost rows are returned. This test
should be updated once KUDU-2645 has been implemented, which should
allow the de-duplication of such ghost rows.

Change-Id: I9aa0ce2276bdd37688e7a91a2efcf49a8f802eb5
Reviewed-on: http://gerrit.cloudera.org:8080/12109
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/236fdfe8
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/236fdfe8
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/236fdfe8

Branch: refs/heads/master
Commit: 236fdfe83ded3098d16380c9e11f65f93f25144f
Parents: 8525a93
Author: Mike Percy <mp...@apache.org>
Authored: Tue Dec 11 18:49:26 2018 -0800
Committer: Mike Percy <mp...@apache.org>
Committed: Fri Dec 21 22:43:56 2018 +0000

----------------------------------------------------------------------
 src/kudu/tablet/CMakeLists.txt                 |   1 +
 src/kudu/tablet/diff_scan-test.cc              | 112 ++++++++++++++++++++
 src/kudu/tablet/diskrowset-test.cc             |   5 +-
 src/kudu/tablet/major_delta_compaction-test.cc |   5 +-
 src/kudu/tablet/tablet-test-base.h             |   5 +-
 src/kudu/tablet/tablet-test-util.h             |  38 ++++---
 src/kudu/tablet/tablet-test.cc                 |   7 +-
 src/kudu/tablet/tablet.cc                      |  59 +++++------
 src/kudu/tablet/tablet.h                       |  57 ++++++----
 src/kudu/tablet/tablet_bootstrap-test.cc       |   7 +-
 src/kudu/tserver/tablet_service.cc             |   7 +-
 11 files changed, 221 insertions(+), 82 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/236fdfe8/src/kudu/tablet/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/CMakeLists.txt b/src/kudu/tablet/CMakeLists.txt
index 8b048ee..ce5fc0b 100644
--- a/src/kudu/tablet/CMakeLists.txt
+++ b/src/kudu/tablet/CMakeLists.txt
@@ -99,6 +99,7 @@ ADD_KUDU_TEST(composite-pushdown-test)
 ADD_KUDU_TEST(delta_compaction-test)
 ADD_KUDU_TEST(deltafile-test)
 ADD_KUDU_TEST(deltamemstore-test)
+ADD_KUDU_TEST(diff_scan-test)
 ADD_KUDU_TEST(diskrowset-test)
 ADD_KUDU_TEST(lock_manager-test)
 ADD_KUDU_TEST(major_delta_compaction-test)

http://git-wip-us.apache.org/repos/asf/kudu/blob/236fdfe8/src/kudu/tablet/diff_scan-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/diff_scan-test.cc b/src/kudu/tablet/diff_scan-test.cc
new file mode 100644
index 0000000..01bf63f
--- /dev/null
+++ b/src/kudu/tablet/diff_scan-test.cc
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "kudu/common/common.pb.h"
+#include "kudu/common/iterator.h"
+#include "kudu/common/scan_spec.h"
+#include "kudu/common/schema.h"
+#include "kudu/gutil/gscoped_ptr.h"
+#include "kudu/tablet/local_tablet_writer.h"
+#include "kudu/tablet/mvcc.h"
+#include "kudu/tablet/rowset.h"
+#include "kudu/tablet/tablet-harness.h"
+#include "kudu/tablet/tablet-test-base.h"
+#include "kudu/tablet/tablet-test-util.h"
+#include "kudu/tablet/tablet.h"
+#include "kudu/util/test_macros.h"
+
+using std::string;
+using std::vector;
+
+namespace kudu {
+namespace tablet {
+
+class DiffScanTest : public TabletTestBase<IntKeyTestSetup<INT64>> {
+ public:
+  DiffScanTest()
+      : Superclass(TabletHarness::Options::ClockType::HYBRID_CLOCK) {}
+
+ private:
+  using Superclass = TabletTestBase<IntKeyTestSetup<INT64>>;
+};
+
+TEST_F(DiffScanTest, TestDiffScan) {
+  auto tablet = this->tablet();
+  auto tablet_id = tablet->tablet_id();
+
+  MvccSnapshot snap1;
+  tablet->mvcc_manager()->TakeSnapshot(&snap1);
+
+  LocalTabletWriter writer(tablet.get(), &client_schema_);
+  constexpr int64_t kRowKey = 1;
+  ASSERT_OK(InsertTestRow(&writer, kRowKey, 1));
+  ASSERT_OK(tablet->Flush());
+
+  // 2. Delete the row and flush the DMS.
+  ASSERT_OK(DeleteTestRow(&writer, kRowKey));
+  ASSERT_OK(tablet->FlushAllDMSForTests());
+
+  // 3. Insert the same row key (with another value) and flush the MRS.
+  ASSERT_OK(InsertTestRow(&writer, kRowKey, 2));
+  ASSERT_OK(tablet->Flush());
+
+  // Ensure there is only 1 live row in the tablet (our reinsert).
+  vector<string> rows;
+  ASSERT_OK(DumpTablet(*tablet, tablet->schema()->CopyWithoutColumnIds(), &rows));
+  ASSERT_EQ(1, rows.size()) << "expected only one live row";
+  ASSERT_EQ("(int64 key=1, int32 key_idx=1, int32 val=2)", rows[0]);
+
+  // 4. Do a diff scan from time snap1.
+  ASSERT_OK(tablet->mvcc_manager()->WaitForApplyingTransactionsToCommit());
+  MvccSnapshot snap2;
+  tablet->mvcc_manager()->TakeSnapshot(&snap2);
+
+  RowIteratorOptions opts;
+  opts.snap_to_include = snap2;
+  opts.order = ORDERED;
+  opts.include_deleted_rows = true;
+
+  auto projection = tablet->schema()->CopyWithoutColumnIds();
+  opts.projection = &projection;
+
+  gscoped_ptr<RowwiseIterator> row_iterator;
+  ASSERT_OK(tablet->NewRowIterator(std::move(opts),
+                                   &row_iterator));
+  ASSERT_TRUE(row_iterator);
+  ScanSpec spec;
+  ASSERT_OK(row_iterator->Init(&spec));
+
+  // For the time being, we should get two rows back, and they should both be
+  // the same key. In reality, one has been deleted.
+  // TODO(KUDU-2645): The result of this test should change once we properly
+  // implement diff scans and the merge iterator is able to deduplicate ghosts.
+  ASSERT_OK(tablet::IterateToStringList(row_iterator.get(), &rows));
+  ASSERT_EQ(2, rows.size());
+  EXPECT_EQ("(int64 key=1, int32 key_idx=1, int32 val=1)", rows[0]);
+  EXPECT_EQ("(int64 key=1, int32 key_idx=1, int32 val=2)", rows[1]);
+}
+
+} // namespace tablet
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/236fdfe8/src/kudu/tablet/diskrowset-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/diskrowset-test.cc b/src/kudu/tablet/diskrowset-test.cc
index 3889162..b10c277 100644
--- a/src/kudu/tablet/diskrowset-test.cc
+++ b/src/kudu/tablet/diskrowset-test.cc
@@ -23,7 +23,6 @@
 #include <memory>
 #include <ostream>
 #include <string>
-#include <type_traits>
 #include <unordered_set>
 #include <vector>
 
@@ -441,7 +440,7 @@ TEST_F(TestRowSet, TestFlushedUpdatesRespectMVCC) {
     opts.snap_to_include = snaps[i];
     gscoped_ptr<RowwiseIterator> iter;
     ASSERT_OK(rs->NewRowIterator(opts, &iter));
-    string data = InitAndDumpIterator(std::move(iter));
+    string data = InitAndDumpIterator(iter.get());
     EXPECT_EQ(StringPrintf(R"((string key="row", uint32 val=%d))", i + 1), data);
   }
 
@@ -456,7 +455,7 @@ TEST_F(TestRowSet, TestFlushedUpdatesRespectMVCC) {
     opts.snap_to_include = snaps[i];
     gscoped_ptr<RowwiseIterator> iter;
     ASSERT_OK(rs->NewRowIterator(opts, &iter));
-    string data = InitAndDumpIterator(std::move(iter));
+    string data = InitAndDumpIterator(iter.get());
     EXPECT_EQ(StringPrintf(R"((string key="row", uint32 val=%d))", i + 1), data);
   }
 }

http://git-wip-us.apache.org/repos/asf/kudu/blob/236fdfe8/src/kudu/tablet/major_delta_compaction-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/major_delta_compaction-test.cc b/src/kudu/tablet/major_delta_compaction-test.cc
index e97d6e7..84671a2 100644
--- a/src/kudu/tablet/major_delta_compaction-test.cc
+++ b/src/kudu/tablet/major_delta_compaction-test.cc
@@ -153,8 +153,11 @@ class TestMajorDeltaCompaction : public KuduRowSetTest {
 
   void VerifyDataWithMvccAndExpectedState(const MvccSnapshot& snap,
                                           const vector<ExpectedRow>& passed_expected_state) {
+      RowIteratorOptions opts;
+      opts.projection = &client_schema_;
+      opts.snap_to_include = snap;
       gscoped_ptr<RowwiseIterator> row_iter;
-      ASSERT_OK(tablet()->NewRowIterator(client_schema_, snap, UNORDERED, &row_iter));
+      ASSERT_OK(tablet()->NewRowIterator(std::move(opts), &row_iter));
       ASSERT_OK(row_iter->Init(nullptr));
 
       vector<string> results;

http://git-wip-us.apache.org/repos/asf/kudu/blob/236fdfe8/src/kudu/tablet/tablet-test-base.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet-test-base.h b/src/kudu/tablet/tablet-test-base.h
index 4a57060..3386f56 100644
--- a/src/kudu/tablet/tablet-test-base.h
+++ b/src/kudu/tablet/tablet-test-base.h
@@ -419,8 +419,11 @@ class TabletTestBase : public KuduTabletTest {
   void VerifyTestRowsWithTimestampAndVerifier(int32_t first_row, uint64_t expected_count,
                                               Timestamp timestamp,
                                               const boost::optional<TestRowVerifier>& verifier) {
+    RowIteratorOptions opts;
+    opts.projection = &client_schema_;
+    opts.snap_to_include = MvccSnapshot(timestamp);
     gscoped_ptr<RowwiseIterator> iter;
-    ASSERT_OK(tablet()->NewRowIterator(client_schema_, MvccSnapshot(timestamp), UNORDERED, &iter));
+    ASSERT_OK(tablet()->NewRowIterator(std::move(opts), &iter));
     VerifyTestRowsWithRowIteratorAndVerifier(first_row, expected_count, std::move(iter), verifier);
   }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/236fdfe8/src/kudu/tablet/tablet-test-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet-test-util.h b/src/kudu/tablet/tablet-test-util.h
index a418941..b5947ea 100644
--- a/src/kudu/tablet/tablet-test-util.h
+++ b/src/kudu/tablet/tablet-test-util.h
@@ -112,7 +112,7 @@ class KuduTabletTest : public KuduTest {
     TabletHarness::Options opts(dir);
     opts.enable_metrics = true;
     opts.clock_type = clock_type_;
-    bool first_time = harness_ == NULL;
+    bool first_time = harness_ == nullptr;
     harness_.reset(new TabletHarness(schema_, opts));
     CHECK_OK(harness_->Create(first_time));
   }
@@ -146,7 +146,7 @@ class KuduTabletTest : public KuduTest {
     tserver::AlterSchemaRequestPB req;
     req.set_schema_version(tablet()->metadata()->schema_version() + 1);
 
-    AlterSchemaTransactionState tx_state(NULL, &req, NULL);
+    AlterSchemaTransactionState tx_state(nullptr, &req, nullptr);
     ASSERT_OK(tablet()->CreatePreparedAlterSchema(&tx_state, &schema));
     ASSERT_OK(tablet()->AlterSchema(&tx_state));
     tx_state.Finish();
@@ -239,9 +239,12 @@ static inline void CollectRowsForSnapshots(
     std::vector<std::vector<std::string>* >* collected_rows) {
   for (const MvccSnapshot& snapshot : snaps) {
     DVLOG(1) << "Snapshot: " <<  snapshot.ToString();
+    RowIteratorOptions opts;
+    opts.projection = &schema;
+    opts.snap_to_include = snapshot;
     gscoped_ptr<RowwiseIterator> iter;
-    ASSERT_OK(tablet->NewRowIterator(schema, snapshot, UNORDERED, &iter));
-    ASSERT_OK(iter->Init(NULL));
+    ASSERT_OK(tablet->NewRowIterator(std::move(opts), &iter));
+    ASSERT_OK(iter->Init(nullptr));
     auto collector = new std::vector<std::string>();
     ASSERT_OK(IterateToStringList(iter.get(), collector));
     for (const auto& mrs : *collector) {
@@ -262,12 +265,13 @@ static inline void VerifySnapshotsHaveSameResult(
   // Now iterate again and make sure we get the same thing.
   for (const MvccSnapshot& snapshot : snaps) {
     DVLOG(1) << "Snapshot: " <<  snapshot.ToString();
+
+    RowIteratorOptions opts;
+    opts.projection = &schema;
+    opts.snap_to_include = snapshot;
     gscoped_ptr<RowwiseIterator> iter;
-    ASSERT_OK(tablet->NewRowIterator(schema,
-                                            snapshot,
-                                            UNORDERED,
-                                            &iter));
-    ASSERT_OK(iter->Init(NULL));
+    ASSERT_OK(tablet->NewRowIterator(std::move(opts), &iter));
+    ASSERT_OK(iter->Init(nullptr));
     std::vector<std::string> collector;
     ASSERT_OK(IterateToStringList(iter.get(), &collector));
     ASSERT_EQ(collector.size(), expected_rows[idx]->size());
@@ -296,21 +300,21 @@ static inline Status DumpRowSet(const RowSet& rs,
 
 // Take an un-initialized iterator, Init() it, and iterate through all of its rows.
 // The resulting string contains a line per entry.
-static inline std::string InitAndDumpIterator(gscoped_ptr<RowwiseIterator> iter) {
-  CHECK_OK(iter->Init(NULL));
+static inline std::string InitAndDumpIterator(RowwiseIterator* iter) {
+  CHECK_OK(iter->Init(nullptr));
 
   std::vector<std::string> out;
-  CHECK_OK(IterateToStringList(iter.get(), &out));
+  CHECK_OK(IterateToStringList(iter, &out));
   return JoinStrings(out, "\n");
 }
 
 // Dump all of the rows of the tablet into the given vector.
 static inline Status DumpTablet(const Tablet& tablet,
-                         const Schema& projection,
-                         std::vector<std::string>* out) {
+                                const Schema& projection,
+                                std::vector<std::string>* out) {
   gscoped_ptr<RowwiseIterator> iter;
   RETURN_NOT_OK(tablet.NewRowIterator(projection, &iter));
-  RETURN_NOT_OK(iter->Init(NULL));
+  RETURN_NOT_OK(iter->Init(nullptr));
   std::vector<std::string> rows;
   RETURN_NOT_OK(IterateToStringList(iter.get(), &rows));
   std::sort(rows.begin(), rows.end());
@@ -325,10 +329,10 @@ static Status WriteRow(const Slice &row_slice, RowSetWriterClass *writer) {
   const Schema &schema = writer->schema();
   DCHECK_EQ(row_slice.size(), schema.byte_size());
 
-  RowBlock block(schema, 1, NULL);
+  RowBlock block(schema, 1, nullptr);
   ConstContiguousRow row(&schema, row_slice.data());
   RowBlockRow dst_row = block.row(0);
-  RETURN_NOT_OK(CopyRow(row, &dst_row, reinterpret_cast<Arena*>(NULL)));
+  RETURN_NOT_OK(CopyRow(row, &dst_row, static_cast<Arena*>(nullptr)));
 
   return writer->AppendBlock(block);
 }

http://git-wip-us.apache.org/repos/asf/kudu/blob/236fdfe8/src/kudu/tablet/tablet-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet-test.cc b/src/kudu/tablet/tablet-test.cc
index e3fac2d..c9da697 100644
--- a/src/kudu/tablet/tablet-test.cc
+++ b/src/kudu/tablet/tablet-test.cc
@@ -553,9 +553,12 @@ TYPED_TEST(TestTablet, TestRowIteratorOrdered) {
     for (int numBlocks = 1; numBlocks < 5; numBlocks*=2) {
       const int rowsPerBlock = kNumRows / numBlocks;
       // Make a new ordered iterator for the current snapshot.
+      RowIteratorOptions opts;
+      opts.projection = &this->client_schema_;
+      opts.snap_to_include = snap;
+      opts.order = ORDERED;
       gscoped_ptr<RowwiseIterator> iter;
-
-      ASSERT_OK(this->tablet()->NewRowIterator(this->client_schema_, snap, ORDERED, &iter));
+      ASSERT_OK(this->tablet()->NewRowIterator(std::move(opts), &iter));
       ASSERT_OK(iter->Init(nullptr));
 
       // Iterate the tablet collecting rows.

http://git-wip-us.apache.org/repos/asf/kudu/blob/236fdfe8/src/kudu/tablet/tablet.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc
index 603a241..0d36f4c 100644
--- a/src/kudu/tablet/tablet.cc
+++ b/src/kudu/tablet/tablet.cc
@@ -400,25 +400,26 @@ void Tablet::SplitKeyRange(const EncodedKey* start_key,
                             column_ids, target_chunk_size, key_range_info);
 }
 
-Status Tablet::NewRowIterator(const Schema &projection,
-                              gscoped_ptr<RowwiseIterator> *iter) const {
+Status Tablet::NewRowIterator(const Schema& projection,
+                              gscoped_ptr<RowwiseIterator>* iter) const {
+  RowIteratorOptions opts;
   // Yield current rows.
-  MvccSnapshot snap(mvcc_);
-  return NewRowIterator(projection, snap, UNORDERED, iter);
+  opts.snap_to_include = MvccSnapshot(mvcc_);
+  opts.projection = &projection;
+  return NewRowIterator(std::move(opts), iter);
 }
 
-
-Status Tablet::NewRowIterator(const Schema &projection,
-                              const MvccSnapshot &snap,
-                              const OrderMode order,
-                              gscoped_ptr<RowwiseIterator> *iter) const {
+Status Tablet::NewRowIterator(RowIteratorOptions opts,
+                              gscoped_ptr<RowwiseIterator>* iter) const {
   RETURN_IF_STOPPED_OR_CHECK_STATE(kOpen);
   if (metrics_) {
     metrics_->scans_started->Increment();
   }
-  IOContext io_context({ tablet_id() });
-  VLOG_WITH_PREFIX(2) << "Created new Iterator under snap: " << snap.ToString();
-  iter->reset(new Iterator(this, projection, snap, order, std::move(io_context)));
+
+  VLOG_WITH_PREFIX(2) << "Created new Iterator for snapshot range: ("
+                      << (opts.snap_to_exclude ? opts.snap_to_exclude->ToString() : "-Inf")
+                      << ", " << opts.snap_to_include.ToString() << ")";
+  iter->reset(new Iterator(this, std::move(opts)));
   return Status::OK();
 }
 
@@ -1812,11 +1813,8 @@ Status Tablet::DebugDump(vector<string> *lines) {
 }
 
 Status Tablet::CaptureConsistentIterators(
-    const Schema* projection,
-    const MvccSnapshot& snap,
+    const RowIteratorOptions& opts,
     const ScanSpec* spec,
-    OrderMode order,
-    const IOContext* io_context,
     vector<shared_ptr<RowwiseIterator>>* iters) const {
 
   shared_lock<rw_spinlock> l(component_lock_);
@@ -1826,17 +1824,12 @@ Status Tablet::CaptureConsistentIterators(
   // in the middle, we don't modify the output arguments.
   vector<shared_ptr<RowwiseIterator>> ret;
 
-  RowIteratorOptions opts;
-  opts.projection = projection;
-  opts.snap_to_include = snap;
-  opts.order = order;
 
   // Grab the memrowset iterator.
   gscoped_ptr<RowwiseIterator> ms_iter;
   RETURN_NOT_OK(components_->memrowset->NewRowIterator(opts, &ms_iter));
   ret.emplace_back(ms_iter.release());
 
-  opts.io_context = io_context;
 
   // Cull row-sets in the case of key-range queries.
   if (spec != nullptr && (spec->lower_bound_key() || spec->exclusive_upper_bound_key())) {
@@ -2432,14 +2425,15 @@ string Tablet::LogPrefix() const {
 // Tablet::Iterator
 ////////////////////////////////////////////////////////////
 
-Tablet::Iterator::Iterator(const Tablet* tablet, const Schema& projection,
-                           MvccSnapshot snap, OrderMode order,
-                           IOContext io_context)
+Tablet::Iterator::Iterator(const Tablet* tablet,
+                           RowIteratorOptions opts)
     : tablet_(tablet),
-      projection_(projection),
-      snap_(std::move(snap)),
-      order_(order),
-      io_context_(std::move(io_context)) {}
+      io_context_({ tablet->tablet_id() }),
+      projection_(*CHECK_NOTNULL(opts.projection)),
+      opts_(std::move(opts)) {
+  opts_.io_context = &io_context_;
+  opts_.projection = &projection_;
+}
 
 Tablet::Iterator::~Iterator() {}
 
@@ -2450,11 +2444,10 @@ Status Tablet::Iterator::Init(ScanSpec *spec) {
   RETURN_NOT_OK(tablet_->GetMappedReadProjection(projection_, &projection_));
 
   vector<shared_ptr<RowwiseIterator>> iters;
-  RETURN_NOT_OK(tablet_->CaptureConsistentIterators(&projection_, snap_, spec, order_,
-                                                    &io_context_, &iters));
+  RETURN_NOT_OK(tablet_->CaptureConsistentIterators(opts_, spec, &iters));
   TRACE_COUNTER_INCREMENT("rowset_iterators", iters.size());
 
-  switch (order_) {
+  switch (opts_.order) {
     case ORDERED:
       iter_.reset(new MergeIterator(projection_, std::move(iters)));
       break;
@@ -2489,6 +2482,10 @@ string Tablet::Iterator::ToString() const {
   return s;
 }
 
+const Schema& Tablet::Iterator::schema() const {
+  return *opts_.projection;
+}
+
 void Tablet::Iterator::GetIteratorStats(vector<IteratorStats>* stats) const {
   iter_->GetIteratorStats(stats);
 }

http://git-wip-us.apache.org/repos/asf/kudu/blob/236fdfe8/src/kudu/tablet/tablet.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet.h b/src/kudu/tablet/tablet.h
index b2262c5..4e3fadc 100644
--- a/src/kudu/tablet/tablet.h
+++ b/src/kudu/tablet/tablet.h
@@ -31,7 +31,6 @@
 #include <gtest/gtest_prod.h>
 
 #include "kudu/clock/clock.h"
-#include "kudu/common/common.pb.h"
 #include "kudu/common/iterator.h"
 #include "kudu/common/schema.h"
 #include "kudu/fs/io_context.h"
@@ -193,14 +192,22 @@ class Tablet {
   // Create a new row iterator which yields the rows as of the current MVCC
   // state of this tablet.
   // The returned iterator is not initialized.
-  Status NewRowIterator(const Schema &projection,
-                        gscoped_ptr<RowwiseIterator> *iter) const;
+  Status NewRowIterator(const Schema& projection,
+                        gscoped_ptr<RowwiseIterator>* iter) const;
 
-  // Create a new row iterator for some historical snapshot.
-  Status NewRowIterator(const Schema &projection,
-                        const MvccSnapshot &snap,
-                        const OrderMode order,
-                        gscoped_ptr<RowwiseIterator> *iter) const;
+  // Create a new row iterator using specific iterator options.
+  //
+  // 'opts' contains the options desired from the iterator.
+  //
+  // Note: the Schema pointed to by the 'projection' field of the 'opts' struct
+  // will be copied, so that pointer only needs to remain valid during the call
+  // to NewRowIterator() and not after that.
+  // Similarly, the 'io_context' field of the 'opts' struct will be ignored and
+  // overwritten in the copy of the 'opts' struct used by the returned iterator
+  // because the iterator constructs and holds the relevant instance of that
+  // object as a member variable.
+  Status NewRowIterator(RowIteratorOptions opts,
+                        gscoped_ptr<RowwiseIterator>* iter) const;
 
   // Flush the current MemRowSet for this tablet to disk. This swaps
   // in a new (initially empty) MemRowSet in its place.
@@ -571,12 +578,10 @@ class Tablet {
   // of creation, and potentially newer data.
   //
   // The returned iterators are not Init()ed.
-  // 'projection' must remain valid and unchanged for the lifetime of the returned iterators.
-  Status CaptureConsistentIterators(const Schema* projection,
-                                    const MvccSnapshot& snap,
+  // The pointer fields of 'opts' must remain valid and unchanged for the
+  // lifetime of the returned iterators.
+  Status CaptureConsistentIterators(const RowIteratorOptions& opts,
                                     const ScanSpec* spec,
-                                    OrderMode order,
-                                    const fs::IOContext* io_context,
                                     std::vector<std::shared_ptr<RowwiseIterator> >* iters) const;
 
   Status PickRowSetsToCompact(RowSetsInCompaction *picked,
@@ -778,9 +783,7 @@ class Tablet::Iterator : public RowwiseIterator {
 
   std::string ToString() const OVERRIDE;
 
-  const Schema &schema() const OVERRIDE {
-    return projection_;
-  }
+  const Schema &schema() const OVERRIDE;
 
   virtual void GetIteratorStats(std::vector<IteratorStats>* stats) const OVERRIDE;
 
@@ -789,14 +792,22 @@ class Tablet::Iterator : public RowwiseIterator {
 
   DISALLOW_COPY_AND_ASSIGN(Iterator);
 
-  Iterator(const Tablet* tablet, const Schema& projection, MvccSnapshot snap,
-           OrderMode order, fs::IOContext io_context);
-
-  const Tablet *tablet_;
+  // Instantiate iterator with given projection and options.
+  //
+  // Note: the Schema pointed to by the 'projection' field of the 'opts' struct
+  // will be copied into projection_, so that pointer only needs to remain
+  // valid during the call to the constructor and not after that.
+  // Similarly, the 'io_context' field of the 'opts' struct will be ignored and
+  // overwritten in the copy of the 'opts' struct used by this class because
+  // this class constructs and holds the relevant instance of that object as a
+  // member variable.
+  Iterator(const Tablet* tablet,
+           RowIteratorOptions opts);
+
+  const Tablet* tablet_;
+  fs::IOContext io_context_;
   Schema projection_;
-  const MvccSnapshot snap_;
-  const OrderMode order_;
-  const fs::IOContext io_context_;
+  RowIteratorOptions opts_;
   gscoped_ptr<RowwiseIterator> iter_;
 };
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/236fdfe8/src/kudu/tablet/tablet_bootstrap-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet_bootstrap-test.cc b/src/kudu/tablet/tablet_bootstrap-test.cc
index f5159d3..ea53e44 100644
--- a/src/kudu/tablet/tablet_bootstrap-test.cc
+++ b/src/kudu/tablet/tablet_bootstrap-test.cc
@@ -63,7 +63,7 @@
 #include "kudu/gutil/ref_counted.h"
 #include "kudu/rpc/result_tracker.h"
 #include "kudu/tablet/metadata.pb.h"
-#include "kudu/tablet/mvcc.h"
+#include "kudu/tablet/rowset.h"
 #include "kudu/tablet/rowset_metadata.h"
 #include "kudu/tablet/tablet-harness.h"
 #include "kudu/tablet/tablet-test-util.h"
@@ -203,8 +203,9 @@ class BootstrapTest : public LogTestBase {
     // Unless we explicitly scan at a snapshot including all timestamps, we don't
     // see the bootstrapped operation. This is likely due to KUDU-138 -- perhaps
     // we aren't properly setting up the clock after bootstrap.
-    MvccSnapshot snap = MvccSnapshot::CreateSnapshotIncludingAllTransactions();
-    ASSERT_OK(tablet->NewRowIterator(schema_, snap, UNORDERED, &iter));
+    RowIteratorOptions opts;
+    opts.projection = &schema_;
+    ASSERT_OK(tablet->NewRowIterator(std::move(opts), &iter));
     ASSERT_OK(iter->Init(nullptr));
     ASSERT_OK(IterateToStringList(iter.get(), results));
     for (const string& result : *results) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/236fdfe8/src/kudu/tserver/tablet_service.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_service.cc b/src/kudu/tserver/tablet_service.cc
index 9b3177e..0ec4e5f 100644
--- a/src/kudu/tserver/tablet_service.cc
+++ b/src/kudu/tserver/tablet_service.cc
@@ -69,6 +69,7 @@
 #include "kudu/tablet/compaction.h"
 #include "kudu/tablet/metadata.pb.h"
 #include "kudu/tablet/mvcc.h"
+#include "kudu/tablet/rowset.h"
 #include "kudu/tablet/tablet.h"
 #include "kudu/tablet/tablet_metadata.h"
 #include "kudu/tablet/tablet_metrics.h"
@@ -2291,7 +2292,11 @@ Status TabletServiceImpl::HandleScanAtSnapshot(const NewScanRequestPB& scan_pb,
   if (scan_pb.order_mode() == UNKNOWN_ORDER_MODE) {
     return Status::InvalidArgument("Unknown order mode specified");
   }
-  RETURN_NOT_OK(tablet->NewRowIterator(projection, snap, scan_pb.order_mode(), iter));
+  tablet::RowIteratorOptions opts;
+  opts.projection = &projection;
+  opts.snap_to_include = snap;
+  opts.order = scan_pb.order_mode();
+  RETURN_NOT_OK(tablet->NewRowIterator(std::move(opts), iter));
 
   // Return the picked snapshot timestamp for both READ_AT_SNAPSHOT
   // and READ_YOUR_WRITES mode.