You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2017/03/06 19:54:20 UTC

[1/2] kudu git commit: KUDU-1893 Ensure evaluation of added columns

Repository: kudu
Updated Branches:
  refs/heads/branch-1.2.x f5fa1827b -> 50039911b


KUDU-1893 Ensure evaluation of added columns

During a normal scan, a CFileIterator sets a flag to indicate whether
the block supports decoder-level evaluation, and if, after returning,
this flag has not been set to false, the returned data will be
evaluated against the predicate.

Columns that are added after table creation and after a flush will use
DefaultColumnValueIterators to scan. These iterators were not setting
the flag, so the predicates would not be evaluated on these columns.
All rows in the existing results set would be returned, regardless of
predicates on added columns.

If a column predicate is added to an added column that should filter
out some rows, the scan will yield incorrect results. There is added
test coverage in all_types-scan-correctness-test.cc. Without the
changes in this patch, RunTests() will fail at the following points:
* Null default, Range+IsNotNull
* Null default, Range
* Non-null default, Range+IsNotNull
* Non-null default, Range with Default
* Non-null default, Range without Default

This patch addresses this by forcing predicate evaluation by the
DefaultColumnValueIterator.

Change-Id: Ic10919962d11effbb1b66d97660acd012b6b4be9
Reviewed-on: http://gerrit.cloudera.org:8080/6225
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <dr...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b2ad6e28
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b2ad6e28
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b2ad6e28

Branch: refs/heads/branch-1.2.x
Commit: b2ad6e28aacd84c9b68e734e7bba92d6671eaa58
Parents: f5fa182
Author: Andrew Wong <aw...@cloudera.com>
Authored: Tue Feb 28 16:30:32 2017 -0800
Committer: David Ribeiro Alves <dr...@apache.org>
Committed: Fri Mar 3 21:52:31 2017 +0000

----------------------------------------------------------------------
 src/kudu/cfile/cfile_reader.cc                  |  14 +
 src/kudu/common/column_predicate.cc             |  20 +-
 src/kudu/common/column_predicate.h              |   4 +
 src/kudu/common/partial_row.h                   |   7 +
 src/kudu/tablet/CMakeLists.txt                  |   1 +
 .../tablet/all_types-scan-correctness-test.cc   | 527 +++++++++++++++++++
 6 files changed, 572 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b2ad6e28/src/kudu/cfile/cfile_reader.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile_reader.cc b/src/kudu/cfile/cfile_reader.cc
index facf474..f307da9 100644
--- a/src/kudu/cfile/cfile_reader.cc
+++ b/src/kudu/cfile/cfile_reader.cc
@@ -487,7 +487,14 @@ Status DefaultColumnValueIterator::Scan(ColumnMaterializationContext* ctx) {
     ColumnDataView dst_view(dst);
     dst_view.SetNullBits(dst->nrows(), value_ != nullptr);
   }
+  // The vector can be cleared if there is a read_default and it does not satisfy the predicate.
   if (value_ != nullptr) {
+    if (ctx->DecoderEvalNotDisabled() &&
+        !ctx->pred()->EvaluateCell(typeinfo_->physical_type(), value_)) {
+      SelectionVectorView sel_view(ctx->sel());
+      sel_view.ClearBits(dst->nrows());
+      return Status::OK();
+    }
     if (typeinfo_->physical_type() == BINARY) {
       const Slice *src_slice = reinterpret_cast<const Slice *>(value_);
       Slice dst_slice;
@@ -502,6 +509,13 @@ Status DefaultColumnValueIterator::Scan(ColumnMaterializationContext* ctx) {
         dst->SetCellValue(i, value_);
       }
     }
+  } else {
+    // If the read_default is null and there is a predicate on this block, remove the rows in this
+    // block from the result set.
+    if (ctx->DecoderEvalNotDisabled()) {
+      SelectionVectorView sel_view(ctx->sel());
+      sel_view.ClearBits(dst->nrows());
+    }
   }
   return Status::OK();
 }

http://git-wip-us.apache.org/repos/asf/kudu/blob/b2ad6e28/src/kudu/common/column_predicate.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate.cc b/src/kudu/common/column_predicate.cc
index 0e8c959..fd2628b 100644
--- a/src/kudu/common/column_predicate.cc
+++ b/src/kudu/common/column_predicate.cc
@@ -519,6 +519,24 @@ void ColumnPredicate::EvaluateForPhysicalType(const ColumnBlock& block,
   LOG(FATAL) << "unknown predicate type";
 }
 
+bool ColumnPredicate::EvaluateCell(DataType type, const void* cell) const {
+  switch (type) {
+    case BOOL: return EvaluateCell<BOOL>(cell);
+    case INT8: return EvaluateCell<INT8>(cell);
+    case INT16: return EvaluateCell<INT16>(cell);
+    case INT32: return EvaluateCell<INT32>(cell);
+    case INT64: return EvaluateCell<INT64>(cell);
+    case UINT8: return EvaluateCell<UINT8>(cell);
+    case UINT16: return EvaluateCell<UINT16>(cell);
+    case UINT32: return EvaluateCell<UINT32>(cell);
+    case UINT64: return EvaluateCell<UINT64>(cell);
+    case FLOAT: return EvaluateCell<FLOAT>(cell);
+    case DOUBLE: return EvaluateCell<DOUBLE>(cell);
+    case BINARY: return EvaluateCell<BINARY>(cell);
+    default: LOG(FATAL) << "unknown physical type: " << GetTypeInfo(type)->name();
+  }
+}
+
 void ColumnPredicate::Evaluate(const ColumnBlock& block, SelectionVector* sel) const {
   DCHECK(sel);
   switch (block.type_info()->physical_type()) {
@@ -534,7 +552,7 @@ void ColumnPredicate::Evaluate(const ColumnBlock& block, SelectionVector* sel) c
     case FLOAT: return EvaluateForPhysicalType<FLOAT>(block, sel);
     case DOUBLE: return EvaluateForPhysicalType<DOUBLE>(block, sel);
     case BINARY: return EvaluateForPhysicalType<BINARY>(block, sel);
-    default: LOG(FATAL) << "unknown physical type: " << block.type_info()->physical_type();
+    default: LOG(FATAL) << "unknown physical type: " << block.type_info()->name();
   }
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b2ad6e28/src/kudu/common/column_predicate.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/column_predicate.h b/src/kudu/common/column_predicate.h
index aa996a1..2466b3a 100644
--- a/src/kudu/common/column_predicate.h
+++ b/src/kudu/common/column_predicate.h
@@ -182,6 +182,10 @@ class ColumnPredicate {
     LOG(FATAL) << "unknown predicate type";
   }
 
+  // Evaluate the predicate on a single cell. Used if the physical type is only known at run-time.
+  // Otherwise, use EvaluateCell<DataType>.
+  bool EvaluateCell(DataType type, const void* cell) const;
+
   // Print the predicate for debugging.
   std::string ToString() const;
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b2ad6e28/src/kudu/common/partial_row.h
----------------------------------------------------------------------
diff --git a/src/kudu/common/partial_row.h b/src/kudu/common/partial_row.h
index e0e89c9..c252fce 100644
--- a/src/kudu/common/partial_row.h
+++ b/src/kudu/common/partial_row.h
@@ -42,6 +42,11 @@ class KuduWriteOperation;
 template<typename KeyTypeWrapper> struct SliceKeysTestSetup;
 template<typename KeyTypeWrapper> struct IntKeysTestSetup;
 } // namespace client
+
+namespace tablet {
+template<typename KeyTypeWrapper> struct SliceTypeRowOps;
+template<typename KeyTypeWrapper> struct NumTypeRowOps;
+}  // namespace tablet
 /// @endcond
 
 class Schema;
@@ -478,6 +483,8 @@ class KUDU_EXPORT KuduPartialRow {
   friend class TestScanSpec;
   template<typename KeyTypeWrapper> friend struct client::SliceKeysTestSetup;
   template<typename KeyTypeWrapper> friend struct client::IntKeysTestSetup;
+  template<typename KeyTypeWrapper> friend struct tablet::SliceTypeRowOps;
+  template<typename KeyTypeWarpper> friend struct tablet::NumTypeRowOps;
   FRIEND_TEST(TestPartitionPruner, TestPrimaryKeyRangePruning);
   FRIEND_TEST(TestPartitionPruner, TestPartialPrimaryKeyRangePruning);
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b2ad6e28/src/kudu/tablet/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/CMakeLists.txt b/src/kudu/tablet/CMakeLists.txt
index 9c8bc13..49fafef 100644
--- a/src/kudu/tablet/CMakeLists.txt
+++ b/src/kudu/tablet/CMakeLists.txt
@@ -96,6 +96,7 @@ ADD_KUDU_TEST(compaction_policy-test
   # Can't use dist-test because it relies on a data file.
   LABELS no_dist_test)
 
+ADD_KUDU_TEST(all_types-scan-correctness-test)
 ADD_KUDU_TEST(diskrowset-test)
 ADD_KUDU_TEST(mt-diskrowset-test RUN_SERIAL true)
 ADD_KUDU_TEST(memrowset-test)

http://git-wip-us.apache.org/repos/asf/kudu/blob/b2ad6e28/src/kudu/tablet/all_types-scan-correctness-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/all_types-scan-correctness-test.cc b/src/kudu/tablet/all_types-scan-correctness-test.cc
new file mode 100644
index 0000000..4f5832b
--- /dev/null
+++ b/src/kudu/tablet/all_types-scan-correctness-test.cc
@@ -0,0 +1,527 @@
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include <kudu/util/flags.h>
+
+#include "kudu/common/schema.h"
+#include "kudu/tablet/tablet-test-base.h"
+
+namespace kudu {
+namespace tablet {
+
+// Column numbers; kColX refers to column with name "val_x".
+static const int kColA = 1;
+static const int kColB = 2;
+static const int kColC = 3;
+
+// Rows added after altering and before altering the table.
+static const int kNumAddedRows = 2000;
+static const int kNumBaseRows = 10000;
+
+// Number of unique values in each column.
+static const int kCardinality = 21;
+
+// Lower and upper bounds of the predicates.
+static const int kLower = 3;
+static const int kUpper = 10;
+
+// Number of elements to allocate memory for. Each range predicate requires two elements, and each
+// call to AddColumn(read_default) requires one. Must be greater than the number of elements
+// actually needed.
+static const int kNumAllocatedElements = 32;
+
+// Strings and binaries will have the following string length.
+static const int kStrlen = 10;
+
+struct RowOpsBase {
+  RowOpsBase(DataType type, EncodingType encoding) : type_(type), encoding_(encoding) {
+    schema_ = Schema({ColumnSchema("key", INT32),
+                     ColumnSchema("val_a", type, true, nullptr, nullptr,
+                         ColumnStorageAttributes(encoding, DEFAULT_COMPRESSION)),
+                     ColumnSchema("val_b", type, true, nullptr, nullptr,
+                         ColumnStorageAttributes(encoding, DEFAULT_COMPRESSION))}, 1);
+
+  }
+  DataType type_;
+  EncodingType encoding_;
+  Schema schema_;
+  Schema altered_schema_;
+};
+
+template<typename KeyTypeWrapper>
+struct SliceTypeRowOps : public RowOpsBase {
+  SliceTypeRowOps() : RowOpsBase(KeyTypeWrapper::type, KeyTypeWrapper::encoding),
+    strs_(kNumAllocatedElements), slices_(kNumAllocatedElements), cur(0) {}
+
+  // Assumes the string representation of n is under strlen characters.
+  std::string LeftZeroPadded(int n, int strlen) {
+    return StringPrintf(Substitute("%0$0$1", strlen, PRId64).c_str(), static_cast<int64_t>(n));
+  }
+
+  void GenerateRow(int value, bool altered, KuduPartialRow* row) {
+    if (value < 0) {
+      CHECK_OK(row->SetNull(kColA));
+      CHECK_OK(row->SetNull(kColB));
+      if (altered) {
+        CHECK_OK(row->SetNull(kColC));
+      }
+    } else {
+      std::string string_a = LeftZeroPadded(value, 10);
+      std::string string_b = LeftZeroPadded(value, 10);
+      std::string string_c = LeftZeroPadded(value, 10);
+      Slice slice_a(string_a);
+      Slice slice_b(string_b);
+      Slice slice_c(string_c);
+      CHECK_OK(row->SetSliceCopy<TypeTraits<KeyTypeWrapper::type>>(kColA, slice_a));
+      CHECK_OK(row->SetSliceCopy<TypeTraits<KeyTypeWrapper::type>>(kColB, slice_b));
+      if (altered) {
+        CHECK_OK(row->SetSliceCopy<TypeTraits<KeyTypeWrapper::type>>(kColC, slice_c));
+      }
+    }
+  }
+
+  void* GenerateElement(int value) {
+    strs_[cur] = LeftZeroPadded(value, kStrlen);
+    slices_[cur] = Slice(strs_[cur]);
+    return &slices_[cur++];
+  }
+
+  ColumnPredicate GenerateRangePredicate(const Schema& schema, size_t col, int lower, int upper) {
+    // Key predicate strings and slices in scope in a vector.
+    strs_[cur] = LeftZeroPadded(lower, 10);
+    strs_[cur + 1] = LeftZeroPadded(upper, 10);
+    slices_[cur] = Slice(strs_[cur]);
+    slices_[cur + 1] = Slice(strs_[cur + 1]);
+    auto pred = ColumnPredicate::Range(schema.column(col), &slices_[cur], &slices_[cur + 1]);
+    cur += 2;
+    return pred;
+  }
+
+  // To avoid issues where either vector is resized, potentially disrupting the correspondence of
+  // address/Slice to string, preallocate the sizes of these vectors as needed.
+  std::vector<std::string> strs_;
+  std::vector<Slice> slices_;
+  size_t cur;
+};
+
+template<typename KeyTypeWrapper>
+struct NumTypeRowOps : public RowOpsBase {
+  NumTypeRowOps() : RowOpsBase(KeyTypeWrapper::type, KeyTypeWrapper::encoding),
+    nums_(kNumAllocatedElements), cur(0) {}
+
+  typedef typename TypeTraits<KeyTypeWrapper::type>::cpp_type CppType;
+
+  void GenerateRow(int value, bool altered, KuduPartialRow* row) {
+    if (value < 0) {
+      CHECK_OK(row->SetNull(kColA));
+      CHECK_OK(row->SetNull(kColB));
+      if (altered) {
+        CHECK_OK(row->SetNull(kColC));
+      }
+    } else {
+      row->Set<TypeTraits<KeyTypeWrapper::type>>(kColA, value);
+      row->Set<TypeTraits<KeyTypeWrapper::type>>(kColB, value);
+      if (altered) {
+        row->Set<TypeTraits<KeyTypeWrapper::type>>(kColC, value);
+      }
+    }
+  }
+
+  void* GenerateElement(int value) {
+    nums_[cur] = value;
+    return &nums_[cur++];
+  }
+
+  ColumnPredicate GenerateRangePredicate(const Schema& schema, size_t col, int lower, int upper) {
+    nums_[cur] = lower;
+    nums_[cur + 1] = upper;
+    auto pred = ColumnPredicate::Range(schema.column(col), &nums_[cur], &nums_[cur + 1]);
+    cur += 2;
+    return pred;
+  }
+
+  // To avoid issues where the vector is resized, potentially disrupting the correspondence of
+  // address to value, preallocate the sizes of this vector as needed.
+  std::vector<CppType> nums_;
+  size_t cur;
+};
+
+// Calculates the number of values in the range [lower, upper) given a repeated, completely
+// non-null pattern with the specified cardinality and the specified number of rows.
+int ExpectedCount(int nrows, int cardinality, int lower_val, int upper_val) {
+  if (lower_val >= upper_val || lower_val >= cardinality) {
+    return 0;
+  }
+  int lower = std::max(0, lower_val);
+  int upper = std::min(cardinality, upper_val);
+  int last_chunk_count = 0;
+  int last_chunk_size = nrows % cardinality;
+
+  if (last_chunk_size == 0 || last_chunk_size <= lower) {
+    // e.g. lower: 3, upper: 8, cardinality:10, nrows: 23, last_chunk_size: 3
+    // Resulting vector: [0001111100 0001111100 000]
+    last_chunk_count = 0;
+  } else if (last_chunk_size <= upper) {
+    // e.g. lower: 3, upper: 8, cardinality:10, nrows: 25, last_chunk_size: 5
+    // Resulting vector: [0001111100 0001111100 00011]
+    last_chunk_count = last_chunk_size - lower;
+  } else {
+    // e.g. lower: 3, upper: 8, cardinality:10, nrows: 29, last_chunk_size: 9
+    // Resulting vector: [0001111100 0001111100 000111110]
+    last_chunk_count = upper - lower;
+  }
+  return (nrows / cardinality) * (upper - lower) + last_chunk_count;
+}
+
+std::string TraceMsg(const std::string& test_name,  int expected, int actual) {
+  std::ostringstream ss;
+  ss << test_name << " Scan Results: " << expected << " expected, " << actual << " returned.";
+  return ss.str();
+}
+
+// Tests for correctness by running predicates on repeated patterns of rows, specified by nrows,
+// cardinality, and null_upper. RowOps is a specialized RowOpsBase, defining how the tablet should
+// add rows and generate predicates for various types.
+template <class RowOps>
+class AllTypesScanCorrectnessTest : public KuduTabletTest {
+public:
+  AllTypesScanCorrectnessTest() : KuduTabletTest(RowOps().schema_), rowops_(RowOps()),
+      schema_(rowops_.schema_), altered_schema_(schema_),
+      base_nrows_(0), base_cardinality_(0), base_null_upper_(0), added_nrows_(0) {}
+
+  void SetUp() override {
+    KuduTabletTest::SetUp();
+  }
+
+  // Adds a pattern of repeated values with the first "null_upper" of every "cardinality" rows
+  // being set to null.
+  // E.g. nrows: 9, cardinality: 5, null_upper: 2
+  // [-, -, 2, 3, 4, -, -, 2, 3]
+  void FillTestTablet(int nrows, int cardinality, int null_upper) {
+    base_nrows_ = nrows;
+    base_cardinality_ = cardinality;
+    base_null_upper_ = null_upper;
+    LocalTabletWriter writer(tablet().get(), &client_schema_);
+    KuduPartialRow row(&client_schema_);
+    for (int i = 0; i < nrows; i++) {
+      CHECK_OK(row.SetInt32(0, i));
+
+      // Populate the bottom of the repeating pattern with NULLs.
+      // Note: Non-positive null_upper will yield a completely non-NULL column.
+      if (i % cardinality < null_upper) {
+        rowops_.GenerateRow(-1, false, &row);
+      } else {
+        rowops_.GenerateRow(i % cardinality, false, &row);
+      }
+      ASSERT_OK_FAST(writer.Insert(row));
+    }
+    ASSERT_OK(tablet()->Flush());
+  }
+
+  // Adds the above pattern to the table with keys starting after the base rows.
+  void FillAlteredTestTablet(int nrows) {
+    added_nrows_ = nrows;
+    LocalTabletWriter writer(tablet().get(), &altered_schema_);
+    KuduPartialRow row(&altered_schema_);
+    for (int i = 0; i < nrows; i++) {
+      CHECK_OK(row.SetInt32(0, base_nrows_ + i));
+      if (i % base_cardinality_ < base_null_upper_) {
+        rowops_.GenerateRow(-1, true, &row);
+      } else {
+        rowops_.GenerateRow(i % base_cardinality_, true, &row);
+      }
+      ASSERT_OK_FAST(writer.Upsert(row));
+    }
+    ASSERT_OK(tablet()->Flush());
+  }
+
+  // Adds a column called "val_c" to the schema with the specified read-default.
+  void AddColumn(int read_default) {
+    void* default_ptr;
+    if (read_default < 0) {
+      default_ptr = nullptr;
+    } else {
+      default_ptr = rowops_.GenerateElement(read_default);
+    }
+    SchemaBuilder builder(tablet()->metadata()->schema());
+    builder.RemoveColumn("val_c");
+    ASSERT_OK(builder.AddColumn("val_c", rowops_.type_, true, default_ptr, nullptr));
+    AlterSchema(builder.Build());
+    altered_schema_ = Schema({ColumnSchema("key", INT32),
+                     ColumnSchema("val_a", rowops_.type_, true, nullptr, nullptr,
+                         ColumnStorageAttributes(rowops_.encoding_, DEFAULT_COMPRESSION)),
+                     ColumnSchema("val_b", rowops_.type_, true, nullptr, nullptr,
+                         ColumnStorageAttributes(rowops_.encoding_, DEFAULT_COMPRESSION)),
+                     ColumnSchema("val_c", rowops_.type_, true, default_ptr, nullptr,
+                         ColumnStorageAttributes(rowops_.encoding_, DEFAULT_COMPRESSION))}, 1);
+  }
+
+  // Scan the results of a query. Set "count" to the number of results satisfying the predicates.
+  // ScanSpec must have all desired predicates already added to it.
+  void ScanWithSpec(const Schema& schema, ScanSpec spec, int* count) {
+    Arena arena(1028, 1028);
+    AutoReleasePool pool;
+    *count = 0;
+    spec.OptimizeScan(schema, &arena, &pool, true);
+    gscoped_ptr<RowwiseIterator> iter;
+    ASSERT_OK(tablet()->NewRowIterator(schema, &iter));
+    ASSERT_OK(iter->Init(&spec));
+    ASSERT_TRUE(spec.predicates().empty()) << "Should have accepted all predicate.";
+    ASSERT_OK(SilentIterateToStringList(iter.get(), count));
+  }
+
+  void RunTests() {
+    RunUnalteredTabletTests();
+    RunAlteredTabletTests();
+  }
+
+  // Runs queries on an un-altered table. Correctness is determined by comparing the number of rows
+  // returned with the number of rows expected by each query.
+  void RunUnalteredTabletTests() {
+    int lower_non_null = kLower;
+    if (kLower < base_null_upper_) {
+      lower_non_null = base_null_upper_;
+    }
+
+    int count = 0;
+    {
+      ScanSpec spec;
+      auto pred = rowops_.GenerateRangePredicate(schema_, kColA, kLower, kUpper);
+      spec.AddPredicate(pred);
+      ScanWithSpec(schema_, spec, &count);
+      int expected_count = ExpectedCount(base_nrows_, base_cardinality_, lower_non_null, kUpper);
+      SCOPED_TRACE(TraceMsg("Range", expected_count, count));
+      ASSERT_EQ(expected_count, count);
+    }
+    {
+      // MultiColumn Range scan
+      // This predicates two columns:
+      //   col_a: [0, upper_val)
+      //   col_b: [lower_val, cardinality)
+      // Since the two columns have identical data, the result will be:
+      //   AND:   [lower_val, upper_val)
+      ScanSpec spec;
+      ColumnPredicate pred_a = rowops_.GenerateRangePredicate(schema_, kColA, 0, kUpper);
+      spec.AddPredicate(pred_a);
+      ColumnPredicate pred_b = rowops_.GenerateRangePredicate(schema_, kColB, kLower,
+                                                              base_cardinality_);
+      spec.AddPredicate(pred_b);
+      ScanWithSpec(schema_, spec, &count);
+      int expected_count = ExpectedCount(base_nrows_, base_cardinality_, lower_non_null, kUpper);
+      SCOPED_TRACE(TraceMsg("MultiColumn Range", expected_count, count));
+      ASSERT_EQ(expected_count, count);
+    }
+    {
+      ScanSpec spec;
+      auto pred = ColumnPredicate::IsNotNull(schema_.column(kColB));
+      spec.AddPredicate(pred);
+      ScanWithSpec(schema_, spec, &count);
+      int expected_count = ExpectedCount(base_nrows_, base_cardinality_, base_null_upper_,
+          base_cardinality_);
+      SCOPED_TRACE(TraceMsg("IsNotNull", expected_count, count));
+      ASSERT_EQ(expected_count, count);
+    }
+  }
+
+  // Runs tests with an altered table. Queries are run with different read-defaults and are deemed
+  // correct if they return the correct number of results.
+  void RunAlteredTabletTests() {
+    int lower_non_null = kLower;
+    // Determine the lowest non-null value in the data range. Used in getting expected counts.
+    if (kLower < base_null_upper_) {
+      lower_non_null = base_null_upper_;
+    }
+    // Tests with null read-default. Ex. case: (-: null, 1: pred satisfied, 0: pred not satisfied)
+    // kLower: 5, kUpper: 8
+    // Base nrows: 30                 |Altered nrows: 25
+    // Cardinality: 20, null_upper: 3, read_default: NULL
+    // Predicate: (val_b >= 0 && val_b < 8) && (val_c >= 5 && val_c < 20)
+    //  0    5    10   15   20   25   |30   35   40   45   50     key
+    // [---11111000000000000---1111100|---11111000000000000---11] val_b
+    // [------------------------------|---00111111111111111---00] val_c
+    // [000000000000000000000000000000|0000011100000000000000000] Result
+    AddColumn(-1);
+    FillAlteredTestTablet(kNumAddedRows);
+    int count = 0;
+    {
+      ScanSpec spec;
+      // val_b >= kLower && val_b <= kUpper && val_c is not null
+      auto pred_b = rowops_.GenerateRangePredicate(altered_schema_, kColB, kLower, kUpper);
+      auto pred_c = ColumnPredicate::IsNotNull(altered_schema_.column(kColC));
+      spec.AddPredicate(pred_b);
+      spec.AddPredicate(pred_c);
+      ScanWithSpec(altered_schema_, spec, &count);
+      // Since the table has a null read-default on the added column, the IsNotNull predicate
+      // should filter out all rows in the base data.
+      int base_expected_count = 0;
+      int altered_expected_count = ExpectedCount(added_nrows_,
+                                                 base_cardinality_,
+                                                 lower_non_null,
+                                                 kUpper);
+      int expected_count = base_expected_count + altered_expected_count;
+      SCOPED_TRACE(TraceMsg("Null default, Range+IsNotNull", expected_count, count));
+      ASSERT_EQ(expected_count, count);
+    }
+    {
+      ScanSpec spec;
+      // val_b >= 0 && val_b < kUpper && val_c >= kLower && val_c < cardinality
+      auto pred_b = rowops_.GenerateRangePredicate(altered_schema_, kColB, 0, kUpper);
+      auto pred_c = rowops_.GenerateRangePredicate(altered_schema_, kColC, kLower,
+                                                   base_cardinality_);
+      spec.AddPredicate(pred_b);
+      spec.AddPredicate(pred_c);
+      ScanWithSpec(altered_schema_, spec, &count);
+      // Since the added column has a null read-default, the base rows will be completely filtered.
+      int base_expected_count = 0;
+      // The added data will be predicated with [lower, upper).
+      int altered_expected_count = ExpectedCount(added_nrows_,
+                                                 base_cardinality_,
+                                                 lower_non_null,
+                                                 kUpper);
+      int expected_count = base_expected_count + altered_expected_count;
+      SCOPED_TRACE(TraceMsg("Null default, Range", expected_count, count));
+      ASSERT_EQ(expected_count, count);
+    }
+    // Tests with non-null read-default. Ex. case:
+    // kLower: 5, kUpper: 8
+    // Base nrows: 30                |Altered nrows: 25
+    // Cardinality: 20, null_upper: 3, read_default: 7
+    // Predicate: (val_b >= 0 && val_b < 8) && (val_c >= 5 && val_c < 20)
+    //  0    5    10   15   20   25   |30   35   40   45   50     key
+    // [---11111000000000000---1111100|---11111000000000000---11] val_b
+    // [111111111111111111111111111111|---00111111111111111---00] val_c
+    // [000111110000000000000001111100|0000011100000000000000000] Result
+    int read_default = 7;
+    AddColumn(read_default);
+    FillAlteredTestTablet(kNumAddedRows);
+    {
+      ScanSpec spec;
+      // val_b >= kLower && val_b <= kUpper && val_c is not null
+      auto pred_b = rowops_.GenerateRangePredicate(altered_schema_, kColB, kLower, kUpper);
+      auto pred_c = ColumnPredicate::IsNotNull(altered_schema_.column(kColC));
+      spec.AddPredicate(pred_b);
+      spec.AddPredicate(pred_c);
+      ScanWithSpec(altered_schema_, spec, &count);
+      int base_expected_count = ExpectedCount(base_nrows_, base_cardinality_, lower_non_null,
+                                              kUpper);
+      // Since the new column has the same data as the base columns, IsNotNull with a Range
+      // predicate should yield the same rows as the Range query alone on the altered data.
+      int altered_expected_count = ExpectedCount(added_nrows_, base_cardinality_, lower_non_null,
+                                                 kUpper);
+      int expected_count = base_expected_count + altered_expected_count;
+      SCOPED_TRACE(TraceMsg("Non-null default, Range+IsNotNull", expected_count, count));
+      ASSERT_EQ(expected_count, count);
+    }
+    {
+      int lower = 0;
+      ScanSpec spec;
+      // val_b >= 0 && val_b < kUpper && val_c >= kLower && val_c < cardinality
+      auto pred_b = rowops_.GenerateRangePredicate(altered_schema_, kColB, 0, kUpper);
+      auto pred_c = rowops_.GenerateRangePredicate(altered_schema_, kColC, kLower,
+                                                   base_cardinality_);
+      spec.AddPredicate(pred_b);
+      spec.AddPredicate(pred_c);
+      ScanWithSpec(altered_schema_, spec, &count);
+      // Take into account possible null values when calculating expected counts.
+      if (base_null_upper_ > 0) {
+        lower = base_null_upper_;
+      }
+      // Because the read_default is in range, the predicate on "val_c" will be satisfied
+      // by base data, and all rows that satisfy "pred_b" will be returned.
+      int base_expected_count = ExpectedCount(base_nrows_, base_cardinality_, lower, kUpper);
+      int altered_expected_count = ExpectedCount(added_nrows_,
+                                                 base_cardinality_,
+                                                 lower_non_null,
+                                                 kUpper);
+      int expected_count = base_expected_count + altered_expected_count;
+      SCOPED_TRACE(TraceMsg("Non-null default, Range with Default", expected_count, count));
+      ASSERT_EQ(expected_count, count);
+    }
+    {
+      // Used to ensure the query does not select the read-default.
+      int default_plus_one = read_default + 1;
+      ScanSpec spec;
+      // val_b >= 0 && val_b < kUpper && val_c >= read_default + 1 && val_c < cardinality
+      auto pred_b = rowops_.GenerateRangePredicate(altered_schema_, kColB, 0, kUpper);
+      auto pred_c = rowops_.GenerateRangePredicate(altered_schema_, kColC, default_plus_one,
+                                                   base_cardinality_);
+      spec.AddPredicate(pred_b);
+      spec.AddPredicate(pred_c);
+      ScanWithSpec(altered_schema_, spec, &count);
+      if (default_plus_one < base_null_upper_) {
+        default_plus_one = base_null_upper_;
+      }
+      // Because the read_default is out of range, the "pred_c" will not be satisfied by base data,
+      // so all base rows will be filtered.
+      int base_expected_count = 0;
+      int altered_expected_count = ExpectedCount(added_nrows_,
+                                                 base_cardinality_,
+                                                 default_plus_one,
+                                                 kUpper);
+      int expected_count = base_expected_count + altered_expected_count;
+      SCOPED_TRACE(TraceMsg("Non-null default, Range without Default", expected_count, count));
+      ASSERT_EQ(expected_count, count);
+    }
+  }
+
+protected:
+  RowOps rowops_;
+  Schema schema_;
+  Schema altered_schema_;
+  int base_nrows_;
+  int base_cardinality_;
+  int base_null_upper_;
+  int added_nrows_;
+};
+
+template<DataType KeyType, EncodingType Encoding>
+struct KeyTypeWrapper {
+  static const DataType type = KeyType;
+  static const EncodingType encoding = Encoding;
+};
+
+typedef ::testing::Types<NumTypeRowOps<KeyTypeWrapper<INT8, BIT_SHUFFLE>>,
+                         NumTypeRowOps<KeyTypeWrapper<INT8, PLAIN_ENCODING>>,
+                         NumTypeRowOps<KeyTypeWrapper<INT8, RLE>>,
+                         NumTypeRowOps<KeyTypeWrapper<INT16, BIT_SHUFFLE>>,
+                         NumTypeRowOps<KeyTypeWrapper<INT16, PLAIN_ENCODING>>,
+                         NumTypeRowOps<KeyTypeWrapper<INT16, RLE>>,
+                         NumTypeRowOps<KeyTypeWrapper<INT32, BIT_SHUFFLE>>,
+                         NumTypeRowOps<KeyTypeWrapper<INT32, PLAIN_ENCODING>>,
+                         NumTypeRowOps<KeyTypeWrapper<INT32, RLE>>,
+                         NumTypeRowOps<KeyTypeWrapper<INT64, BIT_SHUFFLE>>,
+                         NumTypeRowOps<KeyTypeWrapper<INT64, PLAIN_ENCODING>>,
+                         NumTypeRowOps<KeyTypeWrapper<FLOAT, BIT_SHUFFLE>>,
+                         NumTypeRowOps<KeyTypeWrapper<FLOAT, PLAIN_ENCODING>>,
+                         NumTypeRowOps<KeyTypeWrapper<DOUBLE, BIT_SHUFFLE>>,
+                         NumTypeRowOps<KeyTypeWrapper<DOUBLE, PLAIN_ENCODING>>,
+                         SliceTypeRowOps<KeyTypeWrapper<STRING, DICT_ENCODING>>,
+                         SliceTypeRowOps<KeyTypeWrapper<STRING, PLAIN_ENCODING>>,
+                         SliceTypeRowOps<KeyTypeWrapper<STRING, PREFIX_ENCODING>>,
+                         SliceTypeRowOps<KeyTypeWrapper<BINARY, DICT_ENCODING>>,
+                         SliceTypeRowOps<KeyTypeWrapper<BINARY, PLAIN_ENCODING>>,
+                         SliceTypeRowOps<KeyTypeWrapper<BINARY, PREFIX_ENCODING>>
+                         > KeyTypes;
+
+TYPED_TEST_CASE(AllTypesScanCorrectnessTest, KeyTypes);
+
+TYPED_TEST(AllTypesScanCorrectnessTest, AllNonNull) {
+  int null_upper = 0;
+  this->FillTestTablet(kNumBaseRows, kCardinality, null_upper);
+  this->RunTests();
+}
+
+TYPED_TEST(AllTypesScanCorrectnessTest, SomeNull) {
+  int null_upper = kUpper/2;
+  this->FillTestTablet(kNumBaseRows, kCardinality, null_upper);
+  this->RunTests();
+}
+
+TYPED_TEST(AllTypesScanCorrectnessTest, AllNull) {
+  int null_upper = kCardinality;
+  this->FillTestTablet(kNumBaseRows, kCardinality, null_upper);
+  this->RunTests();
+}
+
+}  // namespace tablet
+}  // namespace kudu


[2/2] kudu git commit: KUDU-1905 - Allow reinserts on pk only tables

Posted by mp...@apache.org.
KUDU-1905 - Allow reinserts on pk only tables

Doing a reinsert to a table that has only primary key columns
results in an empty change list. We're currently crashing whenever
we see a empty changelist that is not a delete.

The fix is just to allow empty changelists for reinserts.
This also adds a new flavor of fuzz tests to fuzz-itest.cc
that only have pk-only operations, as well as a directed
regression test that would trigger the problem deterministically.

Ran fuzz-itest in dist-tests with the new tests and the following
command:
KUDU_ALLOW_SLOW_TESTS=1 build-support/dist_test.py --collect-tmpdir \
loop -n 1000 build/debug/bin/fuzz-itest --gtest_repeat=10 \
--gtest_break_on_failure

Tests passed 1000/1000. Results:
http://dist-test.cloudera.org//job?job_id=david.alves.1488580839.22665

Change-Id: I7ce03378c7b97fac8ad8cb7783dec4a1b0277344
Reviewed-on: http://gerrit.cloudera.org:8080/6258
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <to...@apache.org>
(cherry picked from commit fff5cbda80b6cc2016c4bce012d2e183d3c3e2bb)
Reviewed-on: http://gerrit.cloudera.org:8080/6270


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/50039911
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/50039911
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/50039911

Branch: refs/heads/branch-1.2.x
Commit: 50039911b2164a489018aa17453ad4c9f445346e
Parents: b2ad6e2
Author: David Alves <dr...@apache.org>
Authored: Fri Mar 3 14:17:58 2017 -0800
Committer: Todd Lipcon <to...@apache.org>
Committed: Mon Mar 6 19:37:53 2017 +0000

----------------------------------------------------------------------
 src/kudu/common/row_changelist-test.cc   |   6 --
 src/kudu/common/row_changelist.cc        |   6 +-
 src/kudu/integration-tests/fuzz-itest.cc | 137 +++++++++++++++++++++-----
 3 files changed, 118 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/50039911/src/kudu/common/row_changelist-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/row_changelist-test.cc b/src/kudu/common/row_changelist-test.cc
index 9523322..6b934a2 100644
--- a/src/kudu/common/row_changelist-test.cc
+++ b/src/kudu/common/row_changelist-test.cc
@@ -199,12 +199,6 @@ TEST_F(TestRowChangeList, TestInvalid_TooLongDelete) {
                       "Corruption: DELETE changelist too long");
 }
 
-TEST_F(TestRowChangeList, TestInvalid_TooShortReinsert) {
-  RowChangeListDecoder decoder(RowChangeList(Slice("\x03")));
-  ASSERT_STR_CONTAINS(decoder.Init().ToString(),
-                      "Corruption: empty changelist - expected column updates");
-}
-
 TEST_F(TestRowChangeList, TestInvalid_SetNullForNonNullableColumn) {
   faststring buf;
   RowChangeListEncoder rcl(&buf);

http://git-wip-us.apache.org/repos/asf/kudu/blob/50039911/src/kudu/common/row_changelist.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/row_changelist.cc b/src/kudu/common/row_changelist.cc
index 82b6cc3..cd73d46 100644
--- a/src/kudu/common/row_changelist.cc
+++ b/src/kudu/common/row_changelist.cc
@@ -158,9 +158,11 @@ Status RowChangeListDecoder::Init() {
 
   remaining_.remove_prefix(1);
 
-  // We should discard empty REINSERT/UPDATE RowChangeLists, so if after getting
+  // We should discard empty UPDATE RowChangeLists, so if after getting
   // the type remaining_ is empty() return an error.
-  if (!is_delete() && remaining_.empty()) {
+  // Note that REINSERTs might have empty changelists when reinserting a row on a tablet that
+  // has only primary key columns.
+  if (is_update() && remaining_.empty()) {
     return Status::Corruption("empty changelist - expected column updates");
   }
   return Status::OK();

http://git-wip-us.apache.org/repos/asf/kudu/blob/50039911/src/kudu/integration-tests/fuzz-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/fuzz-itest.cc b/src/kudu/integration-tests/fuzz-itest.cc
index 00cd2ec..e59c94f 100644
--- a/src/kudu/integration-tests/fuzz-itest.cc
+++ b/src/kudu/integration-tests/fuzz-itest.cc
@@ -53,6 +53,7 @@ DECLARE_bool(use_hybrid_clock);
 // the fuzz test.
 enum TestOpType {
   TEST_INSERT,
+  TEST_INSERT_PK_ONLY,
   TEST_UPSERT,
   TEST_UPSERT_PK_ONLY,
   TEST_UPDATE,
@@ -101,6 +102,7 @@ namespace tablet {
 
 const char* TestOpType_names[] = {
   "TEST_INSERT",
+  "TEST_INSERT_PK_ONLY",
   "TEST_UPSERT",
   "TEST_UPSERT_PK_ONLY",
   "TEST_UPDATE",
@@ -130,6 +132,33 @@ struct TestOp {
   }
 };
 
+const vector<TestOpType> kAllOps {TEST_INSERT,
+                                  TEST_INSERT_PK_ONLY,
+                                  TEST_UPSERT,
+                                  TEST_UPSERT_PK_ONLY,
+                                  TEST_UPDATE,
+                                  TEST_DELETE,
+                                  TEST_FLUSH_OPS,
+                                  TEST_FLUSH_TABLET,
+                                  TEST_FLUSH_DELTAS,
+                                  TEST_MINOR_COMPACT_DELTAS,
+                                  TEST_MAJOR_COMPACT_DELTAS,
+                                  TEST_COMPACT_TABLET,
+                                  TEST_RESTART_TS,
+                                  TEST_SCAN_AT_TIMESTAMP};
+
+const vector<TestOpType> kPkOnlyOps {TEST_INSERT_PK_ONLY,
+                                     TEST_UPSERT_PK_ONLY,
+                                     TEST_DELETE,
+                                     TEST_FLUSH_OPS,
+                                     TEST_FLUSH_TABLET,
+                                     TEST_FLUSH_DELTAS,
+                                     TEST_MINOR_COMPACT_DELTAS,
+                                     TEST_MAJOR_COMPACT_DELTAS,
+                                     TEST_COMPACT_TABLET,
+                                     TEST_RESTART_TS,
+                                     TEST_SCAN_AT_TIMESTAMP};
+
 // Test which does only random operations against a tablet, including update and random
 // get (ie scans with equal lower and upper bounds).
 //
@@ -142,11 +171,10 @@ class FuzzTest : public KuduTest {
     FLAGS_enable_maintenance_manager = false;
     FLAGS_use_hybrid_clock = false;
     FLAGS_scanner_allow_snapshot_scans_with_logical_timestamps = true;
-
-    schema_ = client::KuduSchemaFromSchema(CreateKeyValueTestSchema());
   }
 
-  void SetUp() override {
+  void CreateTabletAndStartClusterWithSchema(const Schema& schema) {
+    schema_ =  client::KuduSchemaFromSchema(schema);
     KuduTest::SetUp();
 
     MiniClusterOptions opts;
@@ -175,8 +203,8 @@ class FuzzTest : public KuduTest {
   }
 
   void TearDown() override {
-    tablet_peer_.reset();
-    cluster_->Shutdown();
+    if (tablet_peer_) tablet_peer_.reset();
+    if (cluster_) cluster_->Shutdown();
   }
 
   scoped_refptr<TabletPeer> LookupTabletPeer() {
@@ -210,7 +238,7 @@ class FuzzTest : public KuduTest {
                                         TestOpType type) {
     ExpectedKeyValueRow ret;
     unique_ptr<KuduWriteOperation> op;
-    if (type == TEST_INSERT) {
+    if (type == TEST_INSERT || type == TEST_INSERT_PK_ONLY) {
       op.reset(table_->NewInsert());
     } else {
       op.reset(table_->NewUpsert());
@@ -218,17 +246,26 @@ class FuzzTest : public KuduTest {
     KuduPartialRow* row = op->mutable_row();
     CHECK_OK(row->SetInt32(0, key));
     ret.key = key;
-    if (type != TEST_UPSERT_PK_ONLY) {
-      if (val & 1) {
-        CHECK_OK(row->SetNull(1));
-      } else {
-        CHECK_OK(row->SetInt32(1, val));
-        ret.val = val;
+    switch (type) {
+      case TEST_INSERT:
+      case TEST_UPSERT: {
+        if (val & 1) {
+          CHECK_OK(row->SetNull(1));
+        } else {
+          CHECK_OK(row->SetInt32(1, val));
+          ret.val = val;
+        }
+        break;
       }
-    } else {
-      // For "upsert PK only", we expect the row to keep its old value
-      // the row existed, or NULL if there was no old row.
-      ret.val = old_row ? old_row->val : boost::none;
+      case TEST_INSERT_PK_ONLY:
+        break;
+      case TEST_UPSERT_PK_ONLY: {
+        // For "upsert PK only", we expect the row to keep its old value if
+        // the row existed, or NULL if there was no old row.
+        ret.val = old_row ? old_row->val : boost::none;
+        break;
+      }
+      default: LOG(FATAL) << "Invalid test op type: " << TestOpType_names[type];
     }
     CHECK_OK(session_->Apply(op.release()));
     return ret;
@@ -275,7 +312,7 @@ class FuzzTest : public KuduTest {
       for (KuduScanBatch::RowPtr row : batch) {
         ExpectedKeyValueRow ret;
         CHECK_OK(row.GetInt32(0, &ret.key));
-        if (!row.IsNull(1)) {
+        if (schema_.num_columns() > 1 && !row.IsNull(1)) {
           ret.val = 0;
           CHECK_OK(row.GetInt32(1, ret.val.get_ptr()));
         }
@@ -355,7 +392,7 @@ class FuzzTest : public KuduTest {
       for (KuduScanBatch::RowPtr row : batch) {
         ExpectedKeyValueRow ret;
         ASSERT_OK(row.GetInt32(0, &ret.key));
-        if (!row.IsNull(1)) {
+        if (schema_.num_columns() > 1 && !row.IsNull(1)) {
           ret.val = 0;
           ASSERT_OK(row.GetInt32(1, ret.val.get_ptr()));
         }
@@ -392,9 +429,25 @@ class FuzzTest : public KuduTest {
   scoped_refptr<TabletPeer> tablet_peer_;
 };
 
+// The set of ops to draw from.
+enum TestOpSets {
+  ALL, // Pick an operation at random from all possible operations.
+  PK_ONLY // Pick an operation at random from the set of operations that apply only to the
+          // primary key (or that are now row-specific, like flushes or compactions).
+};
+
+TestOpType PickOpAtRandom(TestOpSets sets) {
+  switch (sets) {
+    case ALL:
+      return kAllOps[rand() % kAllOps.size()];
+    case PK_ONLY:
+      return kPkOnlyOps[rand() % kPkOnlyOps.size()];
+  }
+}
+
 // Generate a random valid sequence of operations for use as a
 // fuzz test.
-void GenerateTestCase(vector<TestOp>* ops, int len) {
+void GenerateTestCase(vector<TestOp>* ops, int len, TestOpSets sets = ALL) {
   vector<bool> exists(FLAGS_keyspace_size);
   int op_timestamps = 0;
   bool ops_pending = false;
@@ -403,12 +456,13 @@ void GenerateTestCase(vector<TestOp>* ops, int len) {
   bool data_in_dms = false;
   ops->clear();
   while (ops->size() < len) {
-    TestOpType r = tight_enum_cast<TestOpType>(rand() % enum_limits<TestOpType>::max_enumerator);
+    TestOpType r = PickOpAtRandom(sets);
     int row_key = rand() % FLAGS_keyspace_size;
     switch (r) {
       case TEST_INSERT:
+      case TEST_INSERT_PK_ONLY:
         if (exists[row_key]) continue;
-        ops->push_back({TEST_INSERT, row_key});
+        ops->push_back({r, row_key});
         exists[row_key] = true;
         ops_pending = true;
         data_in_mrs = true;
@@ -501,7 +555,7 @@ void GenerateTestCase(vector<TestOp>* ops, int len) {
         break;
       }
       default:
-        LOG(FATAL);
+        LOG(FATAL) << "Invalid op type: " << r;
     }
   }
 }
@@ -542,6 +596,7 @@ void FuzzTest::RunFuzzCase(const vector<TestOp>& test_ops,
   for (const TestOp& test_op : test_ops) {
     switch (test_op.type) {
       case TEST_INSERT:
+      case TEST_INSERT_PK_ONLY:
       case TEST_UPSERT:
       case TEST_UPSERT_PK_ONLY:
       case TEST_UPDATE:
@@ -554,6 +609,7 @@ void FuzzTest::RunFuzzCase(const vector<TestOp>& test_ops,
     LOG(INFO) << test_op.ToString();
     switch (test_op.type) {
       case TEST_INSERT:
+      case TEST_INSERT_PK_ONLY:
       case TEST_UPSERT:
       case TEST_UPSERT_PK_ONLY: {
         pending_val[test_op.val] = InsertOrUpsertRow(
@@ -611,7 +667,19 @@ void FuzzTest::RunFuzzCase(const vector<TestOp>& test_ops,
 // Generates a random test sequence and runs it.
 // The logs of this test are designed to easily be copy-pasted and create
 // more specific test cases like TestFuzz<N> below.
+TEST_F(FuzzTest, TestRandomFuzzPksOnly) {
+  CreateTabletAndStartClusterWithSchema(Schema({ColumnSchema("key", INT32)}, 1));
+  SeedRandom();
+  vector<TestOp> test_ops;
+  GenerateTestCase(&test_ops, AllowSlowTests() ? 1000 : 50, TestOpSets::PK_ONLY);
+  RunFuzzCase(test_ops);
+}
+
+// Generates a random test sequence and runs it.
+// The logs of this test are designed to easily be copy-pasted and create
+// more specific test cases like TestFuzz<N> below.
 TEST_F(FuzzTest, TestRandomFuzz) {
+  CreateTabletAndStartClusterWithSchema(CreateKeyValueTestSchema());
   SeedRandom();
   vector<TestOp> test_ops;
   GenerateTestCase(&test_ops, AllowSlowTests() ? 1000 : 50);
@@ -622,6 +690,7 @@ TEST_F(FuzzTest, TestRandomFuzz) {
 // This results in very large batches which are likely to span multiple delta blocks
 // when flushed.
 TEST_F(FuzzTest, TestRandomFuzzHugeBatches) {
+  CreateTabletAndStartClusterWithSchema(CreateKeyValueTestSchema());
   SeedRandom();
   vector<TestOp> test_ops;
   GenerateTestCase(&test_ops, AllowSlowTests() ? 500 : 50);
@@ -636,6 +705,7 @@ TEST_F(FuzzTest, TestRandomFuzzHugeBatches) {
 }
 
 TEST_F(FuzzTest, TestFuzz1) {
+  CreateTabletAndStartClusterWithSchema(CreateKeyValueTestSchema());
   vector<TestOp> test_ops = {
       // Get an inserted row in a DRS.
       {TEST_INSERT, 0},
@@ -661,6 +731,7 @@ TEST_F(FuzzTest, TestFuzz1) {
 
 // A particular test case which previously failed TestFuzz.
 TEST_F(FuzzTest, TestFuzz2) {
+  CreateTabletAndStartClusterWithSchema(CreateKeyValueTestSchema());
   vector<TestOp> test_ops = {
     {TEST_INSERT, 0},
     {TEST_DELETE, 0},
@@ -693,6 +764,7 @@ TEST_F(FuzzTest, TestFuzz2) {
 
 // A particular test case which previously failed TestFuzz.
 TEST_F(FuzzTest, TestFuzz3) {
+  CreateTabletAndStartClusterWithSchema(CreateKeyValueTestSchema());
   vector<TestOp> test_ops = {
     {TEST_INSERT, 0},
     {TEST_FLUSH_OPS, 0},
@@ -727,6 +799,7 @@ TEST_F(FuzzTest, TestFuzz3) {
 
 // A particular test case which previously failed TestFuzz.
 TEST_F(FuzzTest, TestFuzz4) {
+  CreateTabletAndStartClusterWithSchema(CreateKeyValueTestSchema());
   vector<TestOp> test_ops = {
     {TEST_INSERT, 0},
     {TEST_FLUSH_OPS, 0},
@@ -762,6 +835,7 @@ TEST_F(FuzzTest, TestFuzz4) {
 //  Actual: "()"
 //  Expected: "(" + cur_val + ")"
 TEST_F(FuzzTest, TestFuzzWithRestarts1) {
+  CreateTabletAndStartClusterWithSchema(CreateKeyValueTestSchema());
   RunFuzzCase({
       {TEST_INSERT, 1},
       {TEST_FLUSH_OPS, 0},
@@ -788,6 +862,7 @@ TEST_F(FuzzTest, TestFuzzWithRestarts1) {
 // insert undo deltas in sorted order (ascending key, then descending ts):
 // got key (row 1@tx5965182714017464320) after (row 1@tx5965182713875046400)
 TEST_F(FuzzTest, TestFuzzWithRestarts2) {
+  CreateTabletAndStartClusterWithSchema(CreateKeyValueTestSchema());
   RunFuzzCase({
       {TEST_INSERT, 0},
       {TEST_FLUSH_OPS, 0},
@@ -822,6 +897,7 @@ TEST_F(FuzzTest, TestFuzzWithRestarts2) {
 // Regression test for KUDU-1467: a sequence involving
 // UPSERT which failed to replay properly upon bootstrap.
 TEST_F(FuzzTest, TestUpsertSeq) {
+  CreateTabletAndStartClusterWithSchema(CreateKeyValueTestSchema());
   RunFuzzCase({
       {TEST_INSERT, 1},
       {TEST_UPSERT, 1},
@@ -840,7 +916,8 @@ TEST_F(FuzzTest, TestUpsertSeq) {
 
 // Regression test for KUDU-1623: updates without primary key
 // columns specified can cause crashes and issues at restart.
-TEST_F(FuzzTest, TestUpsert_PKOnly) {
+TEST_F(FuzzTest, TestUpsert_PKOnlyOps) {
+  CreateTabletAndStartClusterWithSchema(CreateKeyValueTestSchema());
   RunFuzzCase({
       {TEST_INSERT, 1},
       {TEST_FLUSH_OPS, 0},
@@ -850,5 +927,19 @@ TEST_F(FuzzTest, TestUpsert_PKOnly) {
     });
 }
 
+// Regression test for KUDU-1905: reinserts to a tablet that has
+// only primary keys end up as empty change lists. We were previously
+// crashing when a changelist was empty.
+TEST_F(FuzzTest, TestUpsert_PKOnlySchema) {
+  CreateTabletAndStartClusterWithSchema(Schema({ColumnSchema("key", INT32)}, 1));
+  RunFuzzCase({
+      {TEST_UPSERT_PK_ONLY, 1},
+      {TEST_DELETE, 1},
+      {TEST_UPSERT_PK_ONLY, 1},
+      {TEST_UPSERT_PK_ONLY, 1},
+      {TEST_FLUSH_OPS, 0}
+     });
+}
+
 } // namespace tablet
 } // namespace kudu