You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by mp...@apache.org on 2016/08/11 20:45:07 UTC

[2/2] kudu git commit: Clarify that delta compaction is for REDOs

Clarify that delta compaction is for REDOs

This just clarifies docs and comments around delta compaction, as
it relates to the current implementation.

Change-Id: I6e7f3822f3216c53818c082a03f1a1e5c98639d8
Reviewed-on: http://gerrit.cloudera.org:8080/3944
Reviewed-by: Todd Lipcon <to...@apache.org>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b811cbc7
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b811cbc7
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b811cbc7

Branch: refs/heads/master
Commit: b811cbc7ef1b6223d1458ff9cfb3fa9c5a5ea7db
Parents: 7a50894
Author: Mike Percy <mp...@apache.org>
Authored: Thu Aug 11 00:10:24 2016 -0700
Committer: Mike Percy <mp...@apache.org>
Committed: Thu Aug 11 20:44:27 2016 +0000

----------------------------------------------------------------------
 docs/design-docs/tablet.md       | 29 ++++++++++++++++-------------
 src/kudu/tablet/delta_tracker.cc |  3 +++
 src/kudu/tablet/delta_tracker.h  |  6 +++---
 src/kudu/tablet/deltafile.cc     |  2 +-
 src/kudu/tablet/tablet.h         |  2 +-
 5 files changed, 24 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b811cbc7/docs/design-docs/tablet.md
----------------------------------------------------------------------
diff --git a/docs/design-docs/tablet.md b/docs/design-docs/tablet.md
index 3b792c4..18a4f2f 100644
--- a/docs/design-docs/tablet.md
+++ b/docs/design-docs/tablet.md
@@ -412,13 +412,14 @@ keep their own "inserted_on" timestamp column, as they would in a traditional RD
 Types of Delta Compaction
 ============================================================
 
-A delta compaction may be classified as either 'minor' or 'major':
+A REDO delta compaction may be classified as either 'minor' or 'major':
 
-Minor delta compaction:
-------------------------
+Minor REDO delta compaction
+---------------------------
 
 A 'minor' compaction is one that does not include the base data. In this
 type of compaction, the resulting file is itself a delta file.
+
 ```
 +------------+      +---------+     +---------+     +---------+     +---------+
 | base data  | <--- | delta 0 + <-- | delta 1 + <-- | delta 2 + <-- | delta 3 +
@@ -435,14 +436,15 @@ type of compaction, the resulting file is itself a delta file.
                   compaction result
 ```
 
-Minor delta compactions serve only goals 1 and 3: because they do not read or re-write
-base data, they cannot transform REDO records into UNDO.
+Minor REDO delta compactions serve only goal 1: because they do not read or
+re-write base data, they cannot transform REDO records into UNDO.
+
+Major REDO delta compaction
+---------------------------
 
-Major delta compaction:
-------------------------
+A 'major' REDO compaction is one that includes the base data along with any
+number of REDO delta files.
 
-A 'major' compaction is one that includes the base data along with any number
-of delta files.
 ```
 +------------+      +---------+     +---------+     +---------+     +---------+
 | base data  | <--- | delta 0 + <-- | delta 1 + <-- | delta 2 + <-- | delta 3 +
@@ -458,11 +460,12 @@ of delta files.
 \____________________________________/
            compaction result
 ```
-Major delta compactions can satisfy all three goals of delta compactions, but cost
-more than than minor delta compactions since they must read and re-write the base data,
-which is typically larger than the delta data.
 
-A major delta compaction may be performed against any subset of the columns
+Major delta compactions satisfy delta compaction goals 1 and 2, but cost more
+than than minor delta compactions since they must read and re-write the base
+data, which is typically larger than the delta data.
+
+A major REDO delta compaction may be performed against any subset of the columns
 in a DiskRowSet -- if only a single column has received a significant number of updates,
 then a compaction can be performed which only reads and rewrites that column. It is
 assumed that this is a common workload in many EDW-like applications (e.g updating

http://git-wip-us.apache.org/repos/asf/kudu/blob/b811cbc7/src/kudu/tablet/delta_tracker.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/delta_tracker.cc b/src/kudu/tablet/delta_tracker.cc
index d78e85e..04d6292 100644
--- a/src/kudu/tablet/delta_tracker.cc
+++ b/src/kudu/tablet/delta_tracker.cc
@@ -199,6 +199,9 @@ Status DeltaTracker::CompactStores(int start_idx, int end_idx) {
   //
   // TODO(perf): this could be more fine grained
   std::lock_guard<Mutex> l(compact_flush_lock_);
+
+  // At the time of writing, minor delta compaction only compacts REDO delta
+  // files, so we need at least 2 REDO delta stores to proceed.
   if (CountRedoDeltaStores() <= 1) {
     return Status::OK();
   }

http://git-wip-us.apache.org/repos/asf/kudu/blob/b811cbc7/src/kudu/tablet/delta_tracker.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/delta_tracker.h b/src/kudu/tablet/delta_tracker.h
index c60ee0a..f4be010 100644
--- a/src/kudu/tablet/delta_tracker.h
+++ b/src/kudu/tablet/delta_tracker.h
@@ -140,16 +140,16 @@ class DeltaTracker {
   // Sets *deleted to true if so; otherwise sets it to false.
   Status CheckRowDeleted(rowid_t row_idx, bool *deleted, ProbeStats* stats) const;
 
-  // Compacts all deltafiles
+  // Compacts all REDO delta files.
   //
   // TODO keep metadata in the delta stores to indicate whether or not
   // a minor (or -- when implemented -- major) compaction is warranted
   // and if so, compact the stores.
   Status Compact();
 
-  // Performs minor compaction on all delta files between index
+  // Performs minor compaction on all REDO delta files between index
   // "start_idx" and "end_idx" (inclusive) and writes this to a
-  // new delta block. If "end_idx" is set to -1, then delta files at
+  // new REDO delta block. If "end_idx" is set to -1, then delta files at
   // all indexes starting with "start_idx" will be compacted.
   Status CompactStores(int start_idx, int end_idx);
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/b811cbc7/src/kudu/tablet/deltafile.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/deltafile.cc b/src/kudu/tablet/deltafile.cc
index 2f56ad4..e86f970 100644
--- a/src/kudu/tablet/deltafile.cc
+++ b/src/kudu/tablet/deltafile.cc
@@ -271,7 +271,7 @@ Status DeltaFileReader::NewDeltaIterator(const Schema *projection,
         TRACE_COUNTER_INCREMENT("delta_iterators_lazy_initted", 1);
 
         VLOG(2) << (delta_type_ == REDO ? "REDO" : "UNDO") << " delta " << ToString()
-                << "has no delta stats"
+                << " has no delta stats"
                 << ": can't cull for " << snap.ToString();
       } else if (delta_type_ == REDO) {
         VLOG(2) << "REDO delta " << ToString()

http://git-wip-us.apache.org/repos/asf/kudu/blob/b811cbc7/src/kudu/tablet/tablet.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet.h b/src/kudu/tablet/tablet.h
index 2eb5b31..7663cde 100644
--- a/src/kudu/tablet/tablet.h
+++ b/src/kudu/tablet/tablet.h
@@ -283,7 +283,7 @@ class Tablet {
   Status FlushBiggestDMS();
 
   // Finds the RowSet which has the most separate delta files and
-  // issues a minor delta compaction.
+  // issues a delta compaction.
   Status CompactWorstDeltas(RowSet::DeltaCompactionType type);
 
   // Get the highest performance improvement that would come from compacting the delta stores