You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2019/04/17 21:10:13 UTC

[kudu] branch master updated (b080766 -> a4f3746)

This is an automated email from the ASF dual-hosted git repository.

adar pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from b080766  bitmap: add copy operation
     new 8b72748  add document for KUDU-2080
     new f2ce76f  [master] remove tserver_service_proto from libmaster
     new b27db95  rowblock: add copying functionality
     new 423814d  SelectionVector: pad extra bits with zeroes in constructor
     new a4f3746  generic_iterators: refactor MergeIterator for whole-block-copy optimization

The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docs/known_issues.adoc               |   2 +
 src/kudu/common/CMakeLists.txt       |   1 +
 src/kudu/common/columnblock-test.cc  | 110 +++++++++++++++
 src/kudu/common/columnblock.cc       |  60 +++++++++
 src/kudu/common/columnblock.h        |  31 ++++-
 src/kudu/common/generic_iterators.cc | 251 ++++++++++++++++++-----------------
 src/kudu/common/rowblock-test.cc     |  20 +++
 src/kudu/common/rowblock.cc          |  25 ++--
 src/kudu/common/rowblock.h           |  78 +++++++++--
 src/kudu/master/CMakeLists.txt       |   3 +-
 10 files changed, 434 insertions(+), 147 deletions(-)
 create mode 100644 src/kudu/common/columnblock.cc


[kudu] 05/05: generic_iterators: refactor MergeIterator for whole-block-copy optimization

Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit a4f37461dbb9655f13bbc8447d6c46b2d27e0d03
Author: Adar Dembo <ad...@cloudera.com>
AuthorDate: Thu Apr 11 20:51:15 2019 -0700

    generic_iterators: refactor MergeIterator for whole-block-copy optimization
    
    This patch refactors MergeIterator::MaterializeBlock in preparation for the
    whole-block-copy optimization. Specifically, the "copy next row" and
    "advance iter and rebalance heaps" operations have been decomposed into
    standalone functions so that it'll be easier to add "copy whole block" as
    an equivalent function in the future.
    
    Also of note: we no longer set the entire client selection vector up front,
    as this won't necessarily be the case when we copy a block.
    
    Change-Id: I050116edc51bb3e91852e6286c221175770e6382
    Reviewed-on: http://gerrit.cloudera.org:8080/13010
    Reviewed-by: Mike Percy <mp...@apache.org>
    Tested-by: Adar Dembo <ad...@cloudera.com>
---
 src/kudu/common/generic_iterators.cc | 251 ++++++++++++++++++-----------------
 1 file changed, 128 insertions(+), 123 deletions(-)

diff --git a/src/kudu/common/generic_iterators.cc b/src/kudu/common/generic_iterators.cc
index ed501a2..85a463c 100644
--- a/src/kudu/common/generic_iterators.cc
+++ b/src/kudu/common/generic_iterators.cc
@@ -268,7 +268,7 @@ Status MergeIterState::Advance(bool* pulled_new_block) {
 }
 
 Status MergeIterState::PullNextBlock() {
-  CHECK_EQ(rows_advanced_, rows_valid_)
+  CHECK(IsBlockExhausted())
       << "should not pull next block until current block is exhausted";
 
   if (!read_block_) {
@@ -448,10 +448,19 @@ class MergeIterator : public RowwiseIterator {
   virtual Status NextBlock(RowBlock* dst) OVERRIDE;
 
  private:
-  void PrepareBatch(RowBlock* dst);
-  Status MaterializeBlock(RowBlock* dst);
+  // Finds the next row and materializes it into 'dst' at offset 'dst_row_idx'.
+  //
+  // On success, the selection vector in 'dst' and 'dst_row_idx' are both updated.
+  Status MaterializeOneRow(RowBlock* dst, size_t* dst_row_idx);
+
+  // Calls Init() on all of sub-iterators, wrapping them in predicate evaluating
+  // iterators if necessary and setting up additional per-iterator bookkeeping.
   Status InitSubIterators(ScanSpec *spec);
 
+  // Advances to the next row in 'state', destroying it and/or updating the
+  // three heaps if necessary.
+  Status AdvanceAndReheap(MergeIterState* state);
+
   // Moves sub-iterators from cold_ to hot_ if they now overlap with the merge
   // window. Should be called whenever the merge window moves.
   Status RefillHotHeap();
@@ -635,6 +644,46 @@ Status MergeIterator::InitSubIterators(ScanSpec *spec) {
   return Status::OK();
 }
 
+Status MergeIterator::AdvanceAndReheap(MergeIterState* state) {
+  bool pulled_new_block;
+  RETURN_NOT_OK(state->Advance(&pulled_new_block));
+  hot_.pop();
+
+  // Note that hotmaxes_ is not yet popped as it's not necessary to do so if the
+  // merge window hasn't changed. Thus, we can avoid some work by deferring it
+  // into the cases below.
+
+  if (state->IsFullyExhausted()) {
+    hotmaxes_.pop();
+    DestroySubIterator(state);
+
+    // This sub-iterator's removal means the end of the merge window may have shifted.
+    RETURN_NOT_OK(RefillHotHeap());
+  } else if (pulled_new_block) {
+    hotmaxes_.pop();
+
+    // This sub-iterator has a new block, which means the end of the merge window
+    //may have shifted.
+    if (!hotmaxes_.empty() &&
+        schema_->Compare(hotmaxes_.top(), state->next_row()) < 0) {
+      // The new block lies beyond the new end of the merge window.
+      VLOG(2) << "Block finished, became cold: " << state->ToString();
+      cold_.push(state);
+    } else {
+      // The new block is still within the merge window.
+      VLOG(2) << "Block finished, still hot: " << state->ToString();
+      hot_.push(state);
+      hotmaxes_.push(state->last_row());
+    }
+    RETURN_NOT_OK(RefillHotHeap());
+  } else {
+    // The sub-iterator's block's upper bound remains the same; the merge window
+    // has not changed.
+    hot_.push(state);
+  }
+  return Status::OK();
+}
+
 Status MergeIterator::RefillHotHeap() {
   VLOG(2) << "Refilling hot heap";
   while (!cold_.empty() &&
@@ -691,31 +740,10 @@ Status MergeIterator::NextBlock(RowBlock* dst) {
   CHECK(initted_);
   DCHECK_SCHEMA_EQ(*dst->schema(), schema());
 
-  PrepareBatch(dst);
-  RETURN_NOT_OK(MaterializeBlock(dst));
-
-  return Status::OK();
-}
-
-void MergeIterator::PrepareBatch(RowBlock* dst) {
   if (dst->arena()) {
     dst->arena()->Reset();
   }
-}
 
-// TODO(todd): this is an obvious spot to add codegen - there's a ton of branching
-// and such around the comparisons. A simple experiment indicated there's some
-// 2x to be gained.
-Status MergeIterator::MaterializeBlock(RowBlock *dst) {
-  // We need a vector to track the iterators whose next_row() contains the
-  // smallest row key at a given moment during the merge because there may be
-  // multiple deleted rows with the same row key across multiple rowsets, and
-  // up to one live instance, that we have to deduplicate.
-  vector<MergeIterState*> smallest(hot_.size());
-
-  // Initialize the selection vector.
-  // MergeIterState only returns selected rows.
-  dst->selection_vector()->SetAllTrue();
   size_t dst_row_idx = 0;
   while (dst_row_idx < dst->nrows()) {
     // If the hot heap is empty, we must be out of sub-iterators.
@@ -723,118 +751,95 @@ Status MergeIterator::MaterializeBlock(RowBlock *dst) {
       DCHECK(states_.empty());
       break;
     }
+    // TODO(adar): When hot_.size() == 1, materialize an entire block.
+    RETURN_NOT_OK(MaterializeOneRow(dst, &dst_row_idx));
+  }
+
+  if (dst_row_idx < dst->nrows()) {
+    dst->Resize(dst_row_idx);
+  }
 
-    // TODO(adar): optimize the case where hot_.size == 1.
+  return Status::OK();
+}
 
-    // Find the set of sub-iterators whose matching next row keys are the
-    // smallest across all sub-iterators.
-    //
-    // Note: heap ordered iteration isn't the same as a total ordering. For
-    // example, the two absolute smallest keys might be in the same sub-iterator
-    // rather than in the first two sub-iterators yielded via ordered iteration.
-    // However, the goal here is to identify a group of matching keys for the
-    // purpose of deduplication, and we're guaranteed that such matching keys
-    // cannot exist in the same sub-iterator (i.e. the same rowset).
-    smallest.clear();
-    for (auto iter = hot_.ordered_begin(); iter != hot_.ordered_end(); ++iter) {
-      MergeIterState* state = *iter;
-      if (!smallest.empty() &&
-          schema_->Compare(state->next_row(), smallest[0]->next_row()) != 0) {
-        break;
-      }
-      smallest.emplace_back(state);
+// TODO(todd): this is an obvious spot to add codegen - there's a ton of branching
+// and such around the comparisons. A simple experiment indicated there's some
+// 2x to be gained.
+Status MergeIterator::MaterializeOneRow(RowBlock* dst, size_t* dst_row_idx) {
+  // We need a vector to track the iterators whose next_row() contains the
+  // smallest row key at a given moment during the merge because there may be
+  // multiple deleted rows with the same row key across multiple rowsets, and up
+  // to one live instance, that we have to deduplicate.
+  vector<MergeIterState*> smallest;
+  smallest.reserve(hot_.size());
+
+  // Find the set of sub-iterators whose matching next row keys are the smallest
+  // across all sub-iterators.
+  //
+  // Note: heap ordered iteration isn't the same as a total ordering. For
+  // example, the two absolute smallest keys might be in the same sub-iterator
+  // rather than in the first two sub-iterators yielded via ordered iteration.
+  // However, the goal here is to identify a group of matching keys for the
+  // purpose of deduplication, and we're guaranteed that such matching keys
+  // cannot exist in the same sub-iterator (i.e. the same rowset).
+  for (auto iter = hot_.ordered_begin(); iter != hot_.ordered_end(); ++iter) {
+    MergeIterState* state = *iter;
+    if (!smallest.empty() &&
+        schema_->Compare(state->next_row(), smallest[0]->next_row()) != 0) {
+      break;
     }
+    smallest.emplace_back(state);
+  }
 
-    MergeIterState* row_to_return_iter = nullptr;
-    if (!opts_.include_deleted_rows) {
-      // Since deleted rows are not included here, there can only be a single
-      // instance of any given row key in 'smallest'.
-      CHECK_EQ(1, smallest.size()) << "expected only a single smallest row";
-      row_to_return_iter = smallest[0];
-    } else {
-      // Deduplicate any deleted rows. Row instance de-duplication criteria:
-      // 1. If there is a non-deleted instance, return that instance.
-      // 2. If all rows are deleted, any instance will suffice because we
-      //    don't guarantee that we will return valid field values for deleted
-      //    rows.
-      int live_rows_found = 0;
-      for (const auto& s : smallest) {
-        bool is_deleted =
-            *schema_->ExtractColumnFromRow<IS_DELETED>(s->next_row(), is_deleted_col_index_);
-        if (!is_deleted) {
-          // We found the single live instance of the row.
-          row_to_return_iter = s;
+  MergeIterState* row_to_return_iter = nullptr;
+  if (!opts_.include_deleted_rows) {
+    // Since deleted rows are not included here, there can only be a single
+    // instance of any given row key in 'smallest'.
+    CHECK_EQ(1, smallest.size()) << "expected only a single smallest row";
+    row_to_return_iter = smallest[0];
+  } else {
+    // Deduplicate any deleted rows. Row instance de-duplication criteria:
+    // 1. If there is a non-deleted instance, return that instance.
+    // 2. If all rows are deleted, any instance will suffice because we
+    //    don't guarantee that we will return valid field values for deleted
+    //    rows.
+    int live_rows_found = 0;
+    for (const auto& s : smallest) {
+      bool is_deleted =
+          *schema_->ExtractColumnFromRow<IS_DELETED>(s->next_row(), is_deleted_col_index_);
+      if (!is_deleted) {
+        // We found the single live instance of the row.
+        row_to_return_iter = s;
 #ifndef NDEBUG
-          live_rows_found++; // In DEBUG mode, do a sanity-check count of the live rows.
+        live_rows_found++; // In DEBUG mode, do a sanity-check count of the live rows.
 #else
-          break; // In RELEASE mode, short-circuit the loop.
+        break; // In RELEASE mode, short-circuit the loop.
 #endif
-        }
-      }
-      DCHECK_LE(live_rows_found, 1) << "expected at most one live row";
-
-      // If all instances of a given row are deleted then return an arbitrary
-      // deleted instance.
-      if (row_to_return_iter == nullptr) {
-        row_to_return_iter = smallest[0];
-        DCHECK(*schema_->ExtractColumnFromRow<IS_DELETED>(row_to_return_iter->next_row(),
-                                                          is_deleted_col_index_))
-            << "expected deleted row";
       }
     }
-    VLOG(3) << Substitute("Copying row $0 from $1",
-                          dst_row_idx, row_to_return_iter->ToString());
-    RowBlockRow dst_row = dst->row(dst_row_idx++);
-    RETURN_NOT_OK(CopyRow(row_to_return_iter->next_row(), &dst_row, dst->arena()));
-
-    // Advance all matching sub-iterators and remove any that are exhausted.
-    for (auto& s : smallest) {
-      bool pulled_new_block;
-      RETURN_NOT_OK(s->Advance(&pulled_new_block));
-      hot_.pop();
-
-      // Note that hotmaxes_ is not yet popped as it's not necessary to do so if
-      // the merge window hasn't changed. Thus, we can avoid some work by
-      // deferring it into the cases below.
-
-      if (s->IsFullyExhausted()) {
-        hotmaxes_.pop();
-        DestroySubIterator(s);
-
-        // This sub-iterator's removal means the end of the merge window may
-        // have shifted.
-        RETURN_NOT_OK(RefillHotHeap());
-      } else if (pulled_new_block) {
-        hotmaxes_.pop();
-
-        // This sub-iterator has a new block, which means the end of the merge
-        // window may have shifted.
-        if (!hotmaxes_.empty() && schema_->Compare(hotmaxes_.top(), s->next_row()) < 0) {
-          // The new block lies beyond the new end of the merge window.
-          VLOG(2) << "Block finished, became cold: " << s->ToString();
-          cold_.push(s);
-        } else {
-          // The new block is still within the merge window.
-          VLOG(2) << "Block finished, still hot: " << s->ToString();
-          hot_.push(s);
-          hotmaxes_.push(s->last_row());
-        }
-        RETURN_NOT_OK(RefillHotHeap());
-      } else {
-        // The sub-iterator's block's upper bound remains the same; the merge
-        // window has not changed.
-        hot_.push(s);
-      }
+    DCHECK_LE(live_rows_found, 1) << "expected at most one live row";
+
+    // If all instances of a given row are deleted then return an arbitrary
+    // deleted instance.
+    if (row_to_return_iter == nullptr) {
+      row_to_return_iter = smallest[0];
+      DCHECK(*schema_->ExtractColumnFromRow<IS_DELETED>(row_to_return_iter->next_row(),
+                                                        is_deleted_col_index_))
+          << "expected deleted row";
     }
   }
-
-  // The number of rows actually copied to the destination RowBlock may be less
-  // than its original capacity due to deduplication of ghost rows.
-  DCHECK_LE(dst_row_idx, dst->nrows());
-  if (dst_row_idx < dst->nrows()) {
-    dst->Resize(dst_row_idx);
+  VLOG(3) << Substitute("Copying row $0 from $1",
+                        *dst_row_idx, row_to_return_iter->ToString());
+  RowBlockRow dst_row = dst->row(*dst_row_idx);
+  RETURN_NOT_OK(CopyRow(row_to_return_iter->next_row(), &dst_row, dst->arena()));
+
+  // Advance all matching sub-iterators and remove any that are exhausted.
+  for (auto& s : smallest) {
+    RETURN_NOT_OK(AdvanceAndReheap(s));
   }
 
+  dst->selection_vector()->SetRowSelected(*dst_row_idx);
+  (*dst_row_idx)++;
   return Status::OK();
 }
 


[kudu] 01/05: add document for KUDU-2080

Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 8b727489eb787f9d0667192bb1160ed64e2aa70b
Author: jhc <ih...@163.com>
AuthorDate: Mon Mar 18 10:50:50 2019 +0800

    add document for KUDU-2080
    
    Change-Id: I7a802a846ad5ec93ce4e0022ec279f1b4c6cc5db
    Reviewed-on: http://gerrit.cloudera.org:8080/12774
    Tested-by: Kudu Jenkins
    Reviewed-by: Grant Henke <gr...@apache.org>
---
 docs/known_issues.adoc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/known_issues.adoc b/docs/known_issues.adoc
index 0c3e9ac..8788585 100644
--- a/docs/known_issues.adoc
+++ b/docs/known_issues.adoc
@@ -114,6 +114,8 @@
   different locations, it is recommended to measure the bandwidth and latency between servers
   to ensure they fit within the above guidelines.
 
+* All masters must be started at the same time when the cluster is started for the very first time.
+
 == Server management
 
 * Production deployments should configure a least 4 GiB of memory for tablet servers,


[kudu] 03/05: rowblock: add copying functionality

Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit b27db958c5bd99b778f739171d0bfe4d66ec9715
Author: Adar Dembo <ad...@cloudera.com>
AuthorDate: Thu Apr 11 20:37:46 2019 -0700

    rowblock: add copying functionality
    
    This patch adds RowBlock::CopyTo, a function that enables copying of row
    data between RowBlocks. It's a building block for the "whole block copy"
    MergeIterator optimization, wherein part of a (or an entire) sub-iterator
    RowBlock is copied to the client's RowBlock.
    
    Change-Id: I735796f11e3a388ffc66e3d92f8c2097cdec3a91
    Reviewed-on: http://gerrit.cloudera.org:8080/13008
    Reviewed-by: Mike Percy <mp...@apache.org>
    Tested-by: Adar Dembo <ad...@cloudera.com>
---
 src/kudu/common/CMakeLists.txt      |   1 +
 src/kudu/common/columnblock-test.cc | 110 ++++++++++++++++++++++++++++++++++++
 src/kudu/common/columnblock.cc      |  60 ++++++++++++++++++++
 src/kudu/common/columnblock.h       |  31 ++++++++--
 src/kudu/common/rowblock.cc         |  17 ++++--
 src/kudu/common/rowblock.h          |  51 +++++++++++++++++
 6 files changed, 262 insertions(+), 8 deletions(-)

diff --git a/src/kudu/common/CMakeLists.txt b/src/kudu/common/CMakeLists.txt
index fd8d64f..eb6a783 100644
--- a/src/kudu/common/CMakeLists.txt
+++ b/src/kudu/common/CMakeLists.txt
@@ -40,6 +40,7 @@ ADD_EXPORTABLE_LIBRARY(wire_protocol_proto
   NONLINK_DEPS ${WIRE_PROTOCOL_PROTO_TGTS})
 
 set(COMMON_SRCS
+  columnblock.cc
   column_predicate.cc
   encoded_key.cc
   generic_iterators.cc
diff --git a/src/kudu/common/columnblock-test.cc b/src/kudu/common/columnblock-test.cc
index 9ded126..5bed5d4 100644
--- a/src/kudu/common/columnblock-test.cc
+++ b/src/kudu/common/columnblock-test.cc
@@ -17,9 +17,23 @@
 
 #include "kudu/common/columnblock.h"
 
+#include <string>
+
 #include <gtest/gtest.h>
 
 #include "kudu/common/common.pb.h"
+#include "kudu/common/rowblock.h"
+#include "kudu/common/types.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/memory/arena.h"
+#include "kudu/util/test_macros.h"
+
+namespace kudu {
+class Slice;
+} // namespace kudu
+
+using std::string;
+using strings::Substitute;
 
 namespace kudu {
 
@@ -56,4 +70,100 @@ TEST(TestColumnBlock, TestEquals) {
   ASSERT_EQ(scb5, scb6);
 }
 
+TEST(TestColumnBlock, TestCopyTo) {
+  ScopedColumnBlock<UINT32> src(8, /*allow_nulls=*/false);
+  ScopedColumnBlock<UINT32> dst(8, /*allow_nulls=*/false);
+
+  for (int i = 0; i < src.nrows(); i++) {
+    src[i] = i;
+  }
+  for (int i = 0; i < dst.nrows(); i++) {
+    dst[i] = 100;
+  }
+
+  SelectionVector sv(src.nrows());
+  sv.SetAllTrue();
+
+  // src: 0   1   2   3   4   5   6   7
+  // dst: 100 100 100 100 100 100 100 100
+  // ------------------------------------
+  // dst: 100 100 100 100 100 3   4   5
+  ASSERT_OK(src.CopyTo(sv, &dst, 3, 5, 3));
+
+  for (int i = 0; i < dst.nrows(); i++) {
+    int expected_val = i < 5 ? 100 : i - 2;
+    ASSERT_EQ(expected_val, dst[i]);
+  }
+}
+
+TEST(TestColumnBlock, TestCopyToIndirectData) {
+  ScopedColumnBlock<STRING> src(8, /*allow_nulls=*/false);
+  ScopedColumnBlock<STRING> dst(8, /*allow_nulls=*/false);
+
+  // Ignore idx 3, and poke a corresponding hole in the selection vector.
+  Slice* next_cell = reinterpret_cast<Slice*>(src.data());
+  for (int i = 0; i < src.nrows(); i++, next_cell++) {
+    if (i == 3) continue;
+    ASSERT_TRUE(src.arena()->RelocateSlice(Substitute("h$0", i), next_cell));
+  }
+  next_cell = reinterpret_cast<Slice*>(dst.data());
+  for (int i = 0; i < dst.nrows(); i++, next_cell++) {
+    ASSERT_TRUE(dst.arena()->RelocateSlice("", next_cell));
+  }
+
+  SelectionVector sv(src.nrows());
+  sv.SetAllTrue();
+  sv.SetRowUnselected(3);
+
+  // src: h0 h1 h2 ?? h4 h5 h6 h7
+  // dst: "" "" "" "" "" "" "" ""
+  // ----------------------------
+  // dst: "" "" "" "" "" "" h4 h5
+  ASSERT_OK(src.CopyTo(sv, &dst, 3, 5, 3));
+
+  for (int i = 0; i < dst.nrows(); i++) {
+    string expected_val = i < 6 ? "" : Substitute("h$0", i - 2);
+    ASSERT_EQ(expected_val, dst[i].ToString());
+  }
+}
+
+TEST(TestColumnBlock, TestCopyToNulls) {
+  ScopedColumnBlock<UINT32> src(8);
+  ScopedColumnBlock<UINT32> dst(8);
+
+  // Initialize idx 3 to null in both 'src' and 'dst'.
+  for (int i = 0; i < src.nrows(); i++) {
+    src.SetCellIsNull(i, i == 3);
+    if (i != 3) {
+      src[i] = i;
+    }
+  }
+  for (int i = 0; i < dst.nrows(); i++) {
+    dst.SetCellIsNull(i, i == 3);
+    if (i != 3) {
+      dst[i] = 100;
+    }
+  }
+
+  SelectionVector sv(src.nrows());
+  sv.SetAllTrue();
+
+  // src: 0   1   2   null 4   5    6   7
+  // dst: 100 100 100 null 100 100  100 100
+  // --------------------------------------
+  // dst: 100 100 100 null 100 null 4   5
+  ASSERT_OK(src.CopyTo(sv, &dst, 3, 5, 3));
+
+  for (int i = 0; i < dst.nrows(); i++) {
+    SCOPED_TRACE(i);
+    if (i == 3 || i == 5) {
+      ASSERT_TRUE(dst.is_null(i));
+    } else {
+      ASSERT_FALSE(dst.is_null(i));
+      int expected_val = i < 6 ? 100 : i - 2;
+      ASSERT_EQ(expected_val, dst[i]);
+    }
+  }
+}
+
 } // namespace kudu
diff --git a/src/kudu/common/columnblock.cc b/src/kudu/common/columnblock.cc
new file mode 100644
index 0000000..68ce18c
--- /dev/null
+++ b/src/kudu/common/columnblock.cc
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/common/columnblock.h"
+
+#include <cstring>
+
+#include "kudu/common/row.h"
+#include "kudu/common/rowblock.h"
+
+namespace kudu {
+
+Status ColumnBlock::CopyTo(const SelectionVector& sel_vec,
+                           ColumnBlock* dst, size_t src_cell_off,
+                           size_t dst_cell_off, size_t num_cells) const {
+  DCHECK_EQ(type_, dst->type_);
+  DCHECK_EQ(is_nullable(), dst->is_nullable());
+  DCHECK_GE(nrows_, src_cell_off + num_cells);
+  DCHECK_GE(dst->nrows_, dst_cell_off + num_cells);
+
+  // Columns with indirect data need to be copied cell-by-cell in order to
+  // perform arena relocation. Deselected cells must be skipped; the source
+  // content could be garbage so it'd be unsafe to access it as indirect data.
+  if (type_->physical_type() == BINARY) {
+    for (size_t cell_idx = 0; cell_idx < num_cells; cell_idx++) {
+      if (sel_vec.IsRowSelected(src_cell_off + cell_idx)) {
+        Cell s(cell(src_cell_off + cell_idx));
+        Cell d(dst->cell(dst_cell_off + cell_idx));
+        RETURN_NOT_OK(CopyCell(s, &d, dst->arena())); // Also copies nullability.
+      }
+    }
+  } else {
+    memcpy(dst->data_ + (dst_cell_off * type_->size()),
+           data_ + (src_cell_off * type_->size()),
+           num_cells * type_->size());
+    if (null_bitmap_) {
+      BitmapCopy(dst->null_bitmap_, dst_cell_off,
+                 null_bitmap_, src_cell_off,
+                 num_cells);
+  }
+}
+
+  return Status::OK();
+}
+
+} // namespace kudu
diff --git a/src/kudu/common/columnblock.h b/src/kudu/common/columnblock.h
index fe23093..45d0d2a 100644
--- a/src/kudu/common/columnblock.h
+++ b/src/kudu/common/columnblock.h
@@ -14,14 +14,21 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-#ifndef KUDU_COMMON_COLUMNBLOCK_H
-#define KUDU_COMMON_COLUMNBLOCK_H
 
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <ostream>
 #include <string>
 
-#include "kudu/common/row.h"
+#include <glog/logging.h>
+
+#include "kudu/common/common.pb.h"
 #include "kudu/common/types.h"
 #include "kudu/gutil/gscoped_ptr.h"
+#include "kudu/gutil/strings/fastmem.h"
+#include "kudu/gutil/strings/stringpiece.h"
 #include "kudu/util/bitmap.h"
 #include "kudu/util/memory/arena.h"
 #include "kudu/util/memory/overwrite.h"
@@ -30,6 +37,7 @@
 namespace kudu {
 
 class ColumnBlockCell;
+class SelectionVector;
 
 // A block of data all belonging to a single column.
 // This is simply a view into a buffer - it does not have any associated
@@ -121,6 +129,22 @@ class ColumnBlock {
     return s;
   }
 
+  // Copies a range of cells between two ColumnBlocks.
+  //
+  // The extent of the range is designated by 'src_cell_off' and 'num_cells'. It
+  // is copied to 'dst' at 'dst_cell_off'.
+  //
+  // Note: The inclusion of 'sel_vec' in this function is an admission that
+  // ColumnBlocks are always used via RowBlocks, and a requirement for safe
+  // handling of types with indirect data (i.e. deselected cells are not
+  // relocated because doing so would be unsafe).
+  //
+  // TODO(adar): for columns with indirect data, existing arena allocations
+  // belonging to cells in 'dst' that are overwritten will NOT be deallocated.
+  Status CopyTo(const SelectionVector& sel_vec,
+                ColumnBlock* dst, size_t src_cell_off,
+                size_t dst_cell_off, size_t num_cells) const;
+
  private:
   friend class ColumnBlockCell;
   friend class ColumnDataView;
@@ -295,4 +319,3 @@ class ScopedColumnBlock : public ColumnBlock {
 };
 
 } // namespace kudu
-#endif
diff --git a/src/kudu/common/rowblock.cc b/src/kudu/common/rowblock.cc
index 32cce4b..8c4a160 100644
--- a/src/kudu/common/rowblock.cc
+++ b/src/kudu/common/rowblock.cc
@@ -19,6 +19,7 @@
 #include <glog/logging.h>
 
 #include "kudu/gutil/bits.h"
+#include "kudu/gutil/port.h"
 #include "kudu/util/bitmap.h"
 
 namespace kudu {
@@ -32,6 +33,10 @@ SelectionVector::SelectionVector(size_t row_capacity)
 }
 
 void SelectionVector::Resize(size_t n_rows) {
+  if (PREDICT_FALSE(n_rows == n_rows_)) {
+    return;
+  }
+
   size_t new_bytes = BitmapSize(n_rows);
   CHECK_LE(new_bytes, bytes_capacity_);
   n_rows_ = n_rows;
@@ -143,10 +148,14 @@ RowBlock::~RowBlock() {
   }
 }
 
-void RowBlock::Resize(size_t new_size) {
-  CHECK_LE(new_size, row_capacity_);
-  nrows_ = new_size;
-  sel_vec_.Resize(new_size);
+void RowBlock::Resize(size_t n_rows) {
+  if (PREDICT_FALSE(n_rows == nrows_)) {
+    return;
+  }
+
+  CHECK_LE(n_rows, row_capacity_);
+  nrows_ = n_rows;
+  sel_vec_.Resize(n_rows);
 }
 
 } // namespace kudu
diff --git a/src/kudu/common/rowblock.h b/src/kudu/common/rowblock.h
index d2cfc82..aee55a1 100644
--- a/src/kudu/common/rowblock.h
+++ b/src/kudu/common/rowblock.h
@@ -31,6 +31,7 @@
 #include "kudu/gutil/macros.h"
 #include "kudu/gutil/strings/stringpiece.h"
 #include "kudu/util/bitmap.h"
+#include "kudu/util/status.h"
 
 namespace kudu {
 
@@ -128,6 +129,27 @@ class SelectionVector {
 
   size_t nrows() const { return n_rows_; }
 
+  // Copies a range of bits between two SelectionVectors.
+  //
+  // The extent of the range is designated by 'src_row_off' and 'num_rows'. It
+  // is copied to 'dst' at 'dst_row_off'.
+  //
+  // Note: 'dst' will be resized if the copy causes it to grow (though this is
+  // just a "logical" resize; no reallocation takes place).
+  void CopyTo(SelectionVector* dst, size_t src_row_off,
+              size_t dst_row_off, size_t num_rows) const {
+    DCHECK_GE(n_rows_, src_row_off + num_rows);
+
+    size_t new_num_rows = dst_row_off + num_rows;
+    if (new_num_rows > dst->nrows()) {
+      // This will crash if 'dst' lacks adequate capacity.
+      dst->Resize(new_num_rows);
+    }
+
+    BitmapCopy(dst->mutable_bitmap(), dst_row_off,
+               bitmap_.get(), src_row_off, num_rows);
+  }
+
  private:
   // The number of allocated bytes in bitmap_
   size_t bytes_capacity_;
@@ -277,6 +299,35 @@ class RowBlock {
     return &sel_vec_;
   }
 
+  // Copies a range of rows between two RowBlocks.
+  //
+  // The extent of the range is designated by 'src_row_off' and 'num_rows'. It
+  // is copied to 'dst' at 'dst_row_off'.
+  //
+  // Note: 'dst' will be resized if the copy causes it to grow (though this is
+  // just a "logical" resize; no reallocation takes place).
+  Status CopyTo(RowBlock* dst, size_t src_row_off,
+                size_t dst_row_off, size_t num_rows) const {
+    DCHECK_SCHEMA_EQ(*schema_, *dst->schema());
+    DCHECK_GE(nrows_, src_row_off + num_rows);
+
+    size_t new_num_rows = dst_row_off + num_rows;
+    if (new_num_rows > dst->nrows()) {
+      // This will crash if 'dst' lacks adequate capacity.
+      dst->Resize(new_num_rows);
+    }
+
+    for (size_t col_idx = 0; col_idx < schema_->num_columns(); col_idx++) {
+      ColumnBlock src_cb(column_block(col_idx));
+      ColumnBlock dst_cb(dst->column_block(col_idx));
+      RETURN_NOT_OK(src_cb.CopyTo(sel_vec_, &dst_cb,
+                                  src_row_off, dst_row_off, num_rows));
+    }
+
+    sel_vec_.CopyTo(&dst->sel_vec_, src_row_off, dst_row_off, num_rows);
+    return Status::OK();
+  }
+
  private:
   DISALLOW_COPY_AND_ASSIGN(RowBlock);
 


[kudu] 04/05: SelectionVector: pad extra bits with zeroes in constructor

Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 423814d2e4eb2281a0ccf5fc4702153a608e542a
Author: Adar Dembo <ad...@cloudera.com>
AuthorDate: Fri Apr 12 09:56:27 2019 -0700

    SelectionVector: pad extra bits with zeroes in constructor
    
    I found this after removing the MergeIterator's call to SetAllTrue(). It
    turns out that if you don't call any functions that set all bytes en masse,
    CountSelected() and AnySelected() will misbehave as they'll read garbage
    data from any trailing bits beyond the logical end of the bitmap.
    
    We can fix this in one of two ways:
    - Modify CountSelected()/AnySelected() to ignore the trailing bits.
    - Zero out the trailing bits in the constructor.
    
    I opted for the second approach as I found it easier to implement, and I
    suspect it's more performant than the first.
    
    Change-Id: I74baf87c7f38b7eea0ad46825c2421c99f8f42a1
    Reviewed-on: http://gerrit.cloudera.org:8080/13009
    Tested-by: Kudu Jenkins
    Reviewed-by: Mike Percy <mp...@apache.org>
---
 src/kudu/common/rowblock-test.cc | 20 ++++++++++++++++++++
 src/kudu/common/rowblock.cc      |  8 ++------
 src/kudu/common/rowblock.h       | 27 +++++++++++++++++++--------
 3 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/src/kudu/common/rowblock-test.cc b/src/kudu/common/rowblock-test.cc
index 0865289..1dd85b8 100644
--- a/src/kudu/common/rowblock-test.cc
+++ b/src/kudu/common/rowblock-test.cc
@@ -17,6 +17,8 @@
 
 #include "kudu/common/rowblock.h"
 
+#include <cstddef>
+
 #include <gtest/gtest.h>
 
 namespace kudu {
@@ -43,4 +45,22 @@ TEST(TestSelectionVector, TestEquals) {
   ASSERT_NE(sv1, sv3);
 }
 
+// Test that SelectionVector functions that operate on bytes rather
+// than bits work correctly even if we haven't set or unset all bytes en masse.
+TEST(TestSelectionVector, TestNonByteAligned) {
+  SelectionVector sv(3);
+
+  for (size_t i = 0; i < sv.nrows(); i++) {
+    sv.SetRowSelected(i);
+  }
+  ASSERT_EQ(sv.nrows(), sv.CountSelected());
+  ASSERT_TRUE(sv.AnySelected());
+
+  for (size_t i = 0; i < sv.nrows(); i++) {
+    sv.SetRowUnselected(i);
+  }
+  ASSERT_EQ(0, sv.CountSelected());
+  ASSERT_FALSE(sv.AnySelected());
+}
+
 } // namespace kudu
diff --git a/src/kudu/common/rowblock.cc b/src/kudu/common/rowblock.cc
index 8c4a160..4996973 100644
--- a/src/kudu/common/rowblock.cc
+++ b/src/kudu/common/rowblock.cc
@@ -30,6 +30,7 @@ SelectionVector::SelectionVector(size_t row_capacity)
     n_bytes_(bytes_capacity_),
     bitmap_(new uint8_t[n_bytes_]) {
   CHECK_GT(n_bytes_, 0);
+  PadExtraBitsWithZeroes();
 }
 
 void SelectionVector::Resize(size_t n_rows) {
@@ -41,12 +42,7 @@ void SelectionVector::Resize(size_t n_rows) {
   CHECK_LE(new_bytes, bytes_capacity_);
   n_rows_ = n_rows;
   n_bytes_ = new_bytes;
-  // Pad with zeroes up to the next byte in order to give CountSelected()
-  // and AnySelected() the assumption that the size is an even byte
-  size_t bits_in_last_byte = n_rows & 7;
-  if (bits_in_last_byte > 0) {
-    BitmapChangeBits(&bitmap_[0], n_rows_, 8 - bits_in_last_byte, 0);
-  }
+  PadExtraBitsWithZeroes();
 }
 
 void SelectionVector::ClearToSelectAtMost(size_t max_rows) {
diff --git a/src/kudu/common/rowblock.h b/src/kudu/common/rowblock.h
index aee55a1..660c272 100644
--- a/src/kudu/common/rowblock.h
+++ b/src/kudu/common/rowblock.h
@@ -111,15 +111,8 @@ class SelectionVector {
 
   // Set all bits in the bitmap to 1
   void SetAllTrue() {
-    // Initially all rows should be selected.
     memset(&bitmap_[0], 0xff, n_bytes_);
-    // the last byte in the bitmap may have a few extra bits - need to
-    // clear those
-
-    int trailer_bits = 8 - (n_rows_ % 8);
-    if (trailer_bits != 8) {
-      bitmap_[n_bytes_ - 1] >>= trailer_bits;
-    }
+    PadExtraBitsWithZeroes();
   }
 
   // Set all bits in the bitmap to 0
@@ -151,6 +144,24 @@ class SelectionVector {
   }
 
  private:
+
+  // Pads any non-byte-aligned bits at the end of the SelectionVector with zeroes.
+  //
+  // To improve performance, CountSelected() and AnySelected() evaluate the
+  // SelectionVector's bitmap in terms of bytes. As such, they consider all of
+  // the trailing bits, even if the bitmap's bit length is not byte-aligned and
+  // some trailing bits aren't part of the bitmap.
+  //
+  // To address this without sacrificing performance, we need to zero out all
+  // trailing bits at construction time, or after any operation that sets all
+  // bytes in bulk.
+  void PadExtraBitsWithZeroes() {
+    size_t bits_in_last_byte = n_rows_ & 7;
+    if (bits_in_last_byte > 0) {
+      BitmapChangeBits(&bitmap_[0], n_rows_, 8 - bits_in_last_byte, false);
+    }
+  }
+
   // The number of allocated bytes in bitmap_
   size_t bytes_capacity_;
 


[kudu] 02/05: [master] remove tserver_service_proto from libmaster

Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit f2ce76f7ab4d8e2b801daef3895cefdaa4b72909
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Tue Apr 16 22:43:36 2019 -0700

    [master] remove tserver_service_proto from libmaster
    
    Removed tserver_service_proto from target_link_libraries() of the
    master library target.
    
    There are no functional changes in this patch.
    
    Change-Id: Iee425fc7558f48af32338941df7bd34465f313f2
    Reviewed-on: http://gerrit.cloudera.org:8080/13054
    Tested-by: Kudu Jenkins
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
---
 src/kudu/master/CMakeLists.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/kudu/master/CMakeLists.txt b/src/kudu/master/CMakeLists.txt
index aeaaf44..a8098e8 100644
--- a/src/kudu/master/CMakeLists.txt
+++ b/src/kudu/master/CMakeLists.txt
@@ -69,8 +69,7 @@ target_link_libraries(master
   server_process
   tablet
   token_proto
-  tserver
-  tserver_service_proto)
+  tserver)
 
 # Tests
 SET_KUDU_TEST_LINK_LIBS(