You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2019/04/17 21:10:13 UTC
[kudu] branch master updated (b080766 -> a4f3746)
This is an automated email from the ASF dual-hosted git repository.
adar pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.
from b080766 bitmap: add copy operation
new 8b72748 add document for KUDU-2080
new f2ce76f [master] remove tserver_service_proto from libmaster
new b27db95 rowblock: add copying functionality
new 423814d SelectionVector: pad extra bits with zeroes in constructor
new a4f3746 generic_iterators: refactor MergeIterator for whole-block-copy optimization
The 5 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
docs/known_issues.adoc | 2 +
src/kudu/common/CMakeLists.txt | 1 +
src/kudu/common/columnblock-test.cc | 110 +++++++++++++++
src/kudu/common/columnblock.cc | 60 +++++++++
src/kudu/common/columnblock.h | 31 ++++-
src/kudu/common/generic_iterators.cc | 251 ++++++++++++++++++-----------------
src/kudu/common/rowblock-test.cc | 20 +++
src/kudu/common/rowblock.cc | 25 ++--
src/kudu/common/rowblock.h | 78 +++++++++--
src/kudu/master/CMakeLists.txt | 3 +-
10 files changed, 434 insertions(+), 147 deletions(-)
create mode 100644 src/kudu/common/columnblock.cc
[kudu] 05/05: generic_iterators: refactor MergeIterator for
whole-block-copy optimization
Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit a4f37461dbb9655f13bbc8447d6c46b2d27e0d03
Author: Adar Dembo <ad...@cloudera.com>
AuthorDate: Thu Apr 11 20:51:15 2019 -0700
generic_iterators: refactor MergeIterator for whole-block-copy optimization
This patch refactors MergeIterator::MaterializeBlock in preparation for the
whole-block-copy optimization. Specifically, the "copy next row" and
"advance iter and rebalance heaps" operations have been decomposed into
standalone functions so that it'll be easier to add "copy whole block" as
an equivalent function in the future.
Also of note: we no longer set the entire client selection vector up front,
as this won't necessarily be the case when we copy a block.
Change-Id: I050116edc51bb3e91852e6286c221175770e6382
Reviewed-on: http://gerrit.cloudera.org:8080/13010
Reviewed-by: Mike Percy <mp...@apache.org>
Tested-by: Adar Dembo <ad...@cloudera.com>
---
src/kudu/common/generic_iterators.cc | 251 ++++++++++++++++++-----------------
1 file changed, 128 insertions(+), 123 deletions(-)
diff --git a/src/kudu/common/generic_iterators.cc b/src/kudu/common/generic_iterators.cc
index ed501a2..85a463c 100644
--- a/src/kudu/common/generic_iterators.cc
+++ b/src/kudu/common/generic_iterators.cc
@@ -268,7 +268,7 @@ Status MergeIterState::Advance(bool* pulled_new_block) {
}
Status MergeIterState::PullNextBlock() {
- CHECK_EQ(rows_advanced_, rows_valid_)
+ CHECK(IsBlockExhausted())
<< "should not pull next block until current block is exhausted";
if (!read_block_) {
@@ -448,10 +448,19 @@ class MergeIterator : public RowwiseIterator {
virtual Status NextBlock(RowBlock* dst) OVERRIDE;
private:
- void PrepareBatch(RowBlock* dst);
- Status MaterializeBlock(RowBlock* dst);
+ // Finds the next row and materializes it into 'dst' at offset 'dst_row_idx'.
+ //
+ // On success, the selection vector in 'dst' and 'dst_row_idx' are both updated.
+ Status MaterializeOneRow(RowBlock* dst, size_t* dst_row_idx);
+
+ // Calls Init() on all of sub-iterators, wrapping them in predicate evaluating
+ // iterators if necessary and setting up additional per-iterator bookkeeping.
Status InitSubIterators(ScanSpec *spec);
+ // Advances to the next row in 'state', destroying it and/or updating the
+ // three heaps if necessary.
+ Status AdvanceAndReheap(MergeIterState* state);
+
// Moves sub-iterators from cold_ to hot_ if they now overlap with the merge
// window. Should be called whenever the merge window moves.
Status RefillHotHeap();
@@ -635,6 +644,46 @@ Status MergeIterator::InitSubIterators(ScanSpec *spec) {
return Status::OK();
}
+Status MergeIterator::AdvanceAndReheap(MergeIterState* state) {
+ bool pulled_new_block;
+ RETURN_NOT_OK(state->Advance(&pulled_new_block));
+ hot_.pop();
+
+ // Note that hotmaxes_ is not yet popped as it's not necessary to do so if the
+ // merge window hasn't changed. Thus, we can avoid some work by deferring it
+ // into the cases below.
+
+ if (state->IsFullyExhausted()) {
+ hotmaxes_.pop();
+ DestroySubIterator(state);
+
+ // This sub-iterator's removal means the end of the merge window may have shifted.
+ RETURN_NOT_OK(RefillHotHeap());
+ } else if (pulled_new_block) {
+ hotmaxes_.pop();
+
+ // This sub-iterator has a new block, which means the end of the merge window
+ //may have shifted.
+ if (!hotmaxes_.empty() &&
+ schema_->Compare(hotmaxes_.top(), state->next_row()) < 0) {
+ // The new block lies beyond the new end of the merge window.
+ VLOG(2) << "Block finished, became cold: " << state->ToString();
+ cold_.push(state);
+ } else {
+ // The new block is still within the merge window.
+ VLOG(2) << "Block finished, still hot: " << state->ToString();
+ hot_.push(state);
+ hotmaxes_.push(state->last_row());
+ }
+ RETURN_NOT_OK(RefillHotHeap());
+ } else {
+ // The sub-iterator's block's upper bound remains the same; the merge window
+ // has not changed.
+ hot_.push(state);
+ }
+ return Status::OK();
+}
+
Status MergeIterator::RefillHotHeap() {
VLOG(2) << "Refilling hot heap";
while (!cold_.empty() &&
@@ -691,31 +740,10 @@ Status MergeIterator::NextBlock(RowBlock* dst) {
CHECK(initted_);
DCHECK_SCHEMA_EQ(*dst->schema(), schema());
- PrepareBatch(dst);
- RETURN_NOT_OK(MaterializeBlock(dst));
-
- return Status::OK();
-}
-
-void MergeIterator::PrepareBatch(RowBlock* dst) {
if (dst->arena()) {
dst->arena()->Reset();
}
-}
-// TODO(todd): this is an obvious spot to add codegen - there's a ton of branching
-// and such around the comparisons. A simple experiment indicated there's some
-// 2x to be gained.
-Status MergeIterator::MaterializeBlock(RowBlock *dst) {
- // We need a vector to track the iterators whose next_row() contains the
- // smallest row key at a given moment during the merge because there may be
- // multiple deleted rows with the same row key across multiple rowsets, and
- // up to one live instance, that we have to deduplicate.
- vector<MergeIterState*> smallest(hot_.size());
-
- // Initialize the selection vector.
- // MergeIterState only returns selected rows.
- dst->selection_vector()->SetAllTrue();
size_t dst_row_idx = 0;
while (dst_row_idx < dst->nrows()) {
// If the hot heap is empty, we must be out of sub-iterators.
@@ -723,118 +751,95 @@ Status MergeIterator::MaterializeBlock(RowBlock *dst) {
DCHECK(states_.empty());
break;
}
+ // TODO(adar): When hot_.size() == 1, materialize an entire block.
+ RETURN_NOT_OK(MaterializeOneRow(dst, &dst_row_idx));
+ }
+
+ if (dst_row_idx < dst->nrows()) {
+ dst->Resize(dst_row_idx);
+ }
- // TODO(adar): optimize the case where hot_.size == 1.
+ return Status::OK();
+}
- // Find the set of sub-iterators whose matching next row keys are the
- // smallest across all sub-iterators.
- //
- // Note: heap ordered iteration isn't the same as a total ordering. For
- // example, the two absolute smallest keys might be in the same sub-iterator
- // rather than in the first two sub-iterators yielded via ordered iteration.
- // However, the goal here is to identify a group of matching keys for the
- // purpose of deduplication, and we're guaranteed that such matching keys
- // cannot exist in the same sub-iterator (i.e. the same rowset).
- smallest.clear();
- for (auto iter = hot_.ordered_begin(); iter != hot_.ordered_end(); ++iter) {
- MergeIterState* state = *iter;
- if (!smallest.empty() &&
- schema_->Compare(state->next_row(), smallest[0]->next_row()) != 0) {
- break;
- }
- smallest.emplace_back(state);
+// TODO(todd): this is an obvious spot to add codegen - there's a ton of branching
+// and such around the comparisons. A simple experiment indicated there's some
+// 2x to be gained.
+Status MergeIterator::MaterializeOneRow(RowBlock* dst, size_t* dst_row_idx) {
+ // We need a vector to track the iterators whose next_row() contains the
+ // smallest row key at a given moment during the merge because there may be
+ // multiple deleted rows with the same row key across multiple rowsets, and up
+ // to one live instance, that we have to deduplicate.
+ vector<MergeIterState*> smallest;
+ smallest.reserve(hot_.size());
+
+ // Find the set of sub-iterators whose matching next row keys are the smallest
+ // across all sub-iterators.
+ //
+ // Note: heap ordered iteration isn't the same as a total ordering. For
+ // example, the two absolute smallest keys might be in the same sub-iterator
+ // rather than in the first two sub-iterators yielded via ordered iteration.
+ // However, the goal here is to identify a group of matching keys for the
+ // purpose of deduplication, and we're guaranteed that such matching keys
+ // cannot exist in the same sub-iterator (i.e. the same rowset).
+ for (auto iter = hot_.ordered_begin(); iter != hot_.ordered_end(); ++iter) {
+ MergeIterState* state = *iter;
+ if (!smallest.empty() &&
+ schema_->Compare(state->next_row(), smallest[0]->next_row()) != 0) {
+ break;
}
+ smallest.emplace_back(state);
+ }
- MergeIterState* row_to_return_iter = nullptr;
- if (!opts_.include_deleted_rows) {
- // Since deleted rows are not included here, there can only be a single
- // instance of any given row key in 'smallest'.
- CHECK_EQ(1, smallest.size()) << "expected only a single smallest row";
- row_to_return_iter = smallest[0];
- } else {
- // Deduplicate any deleted rows. Row instance de-duplication criteria:
- // 1. If there is a non-deleted instance, return that instance.
- // 2. If all rows are deleted, any instance will suffice because we
- // don't guarantee that we will return valid field values for deleted
- // rows.
- int live_rows_found = 0;
- for (const auto& s : smallest) {
- bool is_deleted =
- *schema_->ExtractColumnFromRow<IS_DELETED>(s->next_row(), is_deleted_col_index_);
- if (!is_deleted) {
- // We found the single live instance of the row.
- row_to_return_iter = s;
+ MergeIterState* row_to_return_iter = nullptr;
+ if (!opts_.include_deleted_rows) {
+ // Since deleted rows are not included here, there can only be a single
+ // instance of any given row key in 'smallest'.
+ CHECK_EQ(1, smallest.size()) << "expected only a single smallest row";
+ row_to_return_iter = smallest[0];
+ } else {
+ // Deduplicate any deleted rows. Row instance de-duplication criteria:
+ // 1. If there is a non-deleted instance, return that instance.
+ // 2. If all rows are deleted, any instance will suffice because we
+ // don't guarantee that we will return valid field values for deleted
+ // rows.
+ int live_rows_found = 0;
+ for (const auto& s : smallest) {
+ bool is_deleted =
+ *schema_->ExtractColumnFromRow<IS_DELETED>(s->next_row(), is_deleted_col_index_);
+ if (!is_deleted) {
+ // We found the single live instance of the row.
+ row_to_return_iter = s;
#ifndef NDEBUG
- live_rows_found++; // In DEBUG mode, do a sanity-check count of the live rows.
+ live_rows_found++; // In DEBUG mode, do a sanity-check count of the live rows.
#else
- break; // In RELEASE mode, short-circuit the loop.
+ break; // In RELEASE mode, short-circuit the loop.
#endif
- }
- }
- DCHECK_LE(live_rows_found, 1) << "expected at most one live row";
-
- // If all instances of a given row are deleted then return an arbitrary
- // deleted instance.
- if (row_to_return_iter == nullptr) {
- row_to_return_iter = smallest[0];
- DCHECK(*schema_->ExtractColumnFromRow<IS_DELETED>(row_to_return_iter->next_row(),
- is_deleted_col_index_))
- << "expected deleted row";
}
}
- VLOG(3) << Substitute("Copying row $0 from $1",
- dst_row_idx, row_to_return_iter->ToString());
- RowBlockRow dst_row = dst->row(dst_row_idx++);
- RETURN_NOT_OK(CopyRow(row_to_return_iter->next_row(), &dst_row, dst->arena()));
-
- // Advance all matching sub-iterators and remove any that are exhausted.
- for (auto& s : smallest) {
- bool pulled_new_block;
- RETURN_NOT_OK(s->Advance(&pulled_new_block));
- hot_.pop();
-
- // Note that hotmaxes_ is not yet popped as it's not necessary to do so if
- // the merge window hasn't changed. Thus, we can avoid some work by
- // deferring it into the cases below.
-
- if (s->IsFullyExhausted()) {
- hotmaxes_.pop();
- DestroySubIterator(s);
-
- // This sub-iterator's removal means the end of the merge window may
- // have shifted.
- RETURN_NOT_OK(RefillHotHeap());
- } else if (pulled_new_block) {
- hotmaxes_.pop();
-
- // This sub-iterator has a new block, which means the end of the merge
- // window may have shifted.
- if (!hotmaxes_.empty() && schema_->Compare(hotmaxes_.top(), s->next_row()) < 0) {
- // The new block lies beyond the new end of the merge window.
- VLOG(2) << "Block finished, became cold: " << s->ToString();
- cold_.push(s);
- } else {
- // The new block is still within the merge window.
- VLOG(2) << "Block finished, still hot: " << s->ToString();
- hot_.push(s);
- hotmaxes_.push(s->last_row());
- }
- RETURN_NOT_OK(RefillHotHeap());
- } else {
- // The sub-iterator's block's upper bound remains the same; the merge
- // window has not changed.
- hot_.push(s);
- }
+ DCHECK_LE(live_rows_found, 1) << "expected at most one live row";
+
+ // If all instances of a given row are deleted then return an arbitrary
+ // deleted instance.
+ if (row_to_return_iter == nullptr) {
+ row_to_return_iter = smallest[0];
+ DCHECK(*schema_->ExtractColumnFromRow<IS_DELETED>(row_to_return_iter->next_row(),
+ is_deleted_col_index_))
+ << "expected deleted row";
}
}
-
- // The number of rows actually copied to the destination RowBlock may be less
- // than its original capacity due to deduplication of ghost rows.
- DCHECK_LE(dst_row_idx, dst->nrows());
- if (dst_row_idx < dst->nrows()) {
- dst->Resize(dst_row_idx);
+ VLOG(3) << Substitute("Copying row $0 from $1",
+ *dst_row_idx, row_to_return_iter->ToString());
+ RowBlockRow dst_row = dst->row(*dst_row_idx);
+ RETURN_NOT_OK(CopyRow(row_to_return_iter->next_row(), &dst_row, dst->arena()));
+
+ // Advance all matching sub-iterators and remove any that are exhausted.
+ for (auto& s : smallest) {
+ RETURN_NOT_OK(AdvanceAndReheap(s));
}
+ dst->selection_vector()->SetRowSelected(*dst_row_idx);
+ (*dst_row_idx)++;
return Status::OK();
}
[kudu] 01/05: add document for KUDU-2080
Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 8b727489eb787f9d0667192bb1160ed64e2aa70b
Author: jhc <ih...@163.com>
AuthorDate: Mon Mar 18 10:50:50 2019 +0800
add document for KUDU-2080
Change-Id: I7a802a846ad5ec93ce4e0022ec279f1b4c6cc5db
Reviewed-on: http://gerrit.cloudera.org:8080/12774
Tested-by: Kudu Jenkins
Reviewed-by: Grant Henke <gr...@apache.org>
---
docs/known_issues.adoc | 2 ++
1 file changed, 2 insertions(+)
diff --git a/docs/known_issues.adoc b/docs/known_issues.adoc
index 0c3e9ac..8788585 100644
--- a/docs/known_issues.adoc
+++ b/docs/known_issues.adoc
@@ -114,6 +114,8 @@
different locations, it is recommended to measure the bandwidth and latency between servers
to ensure they fit within the above guidelines.
+* All masters must be started at the same time when the cluster is started for the very first time.
+
== Server management
* Production deployments should configure a least 4 GiB of memory for tablet servers,
[kudu] 03/05: rowblock: add copying functionality
Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit b27db958c5bd99b778f739171d0bfe4d66ec9715
Author: Adar Dembo <ad...@cloudera.com>
AuthorDate: Thu Apr 11 20:37:46 2019 -0700
rowblock: add copying functionality
This patch adds RowBlock::CopyTo, a function that enables copying of row
data between RowBlocks. It's a building block for the "whole block copy"
MergeIterator optimization, wherein part of a (or an entire) sub-iterator
RowBlock is copied to the client's RowBlock.
Change-Id: I735796f11e3a388ffc66e3d92f8c2097cdec3a91
Reviewed-on: http://gerrit.cloudera.org:8080/13008
Reviewed-by: Mike Percy <mp...@apache.org>
Tested-by: Adar Dembo <ad...@cloudera.com>
---
src/kudu/common/CMakeLists.txt | 1 +
src/kudu/common/columnblock-test.cc | 110 ++++++++++++++++++++++++++++++++++++
src/kudu/common/columnblock.cc | 60 ++++++++++++++++++++
src/kudu/common/columnblock.h | 31 ++++++++--
src/kudu/common/rowblock.cc | 17 ++++--
src/kudu/common/rowblock.h | 51 +++++++++++++++++
6 files changed, 262 insertions(+), 8 deletions(-)
diff --git a/src/kudu/common/CMakeLists.txt b/src/kudu/common/CMakeLists.txt
index fd8d64f..eb6a783 100644
--- a/src/kudu/common/CMakeLists.txt
+++ b/src/kudu/common/CMakeLists.txt
@@ -40,6 +40,7 @@ ADD_EXPORTABLE_LIBRARY(wire_protocol_proto
NONLINK_DEPS ${WIRE_PROTOCOL_PROTO_TGTS})
set(COMMON_SRCS
+ columnblock.cc
column_predicate.cc
encoded_key.cc
generic_iterators.cc
diff --git a/src/kudu/common/columnblock-test.cc b/src/kudu/common/columnblock-test.cc
index 9ded126..5bed5d4 100644
--- a/src/kudu/common/columnblock-test.cc
+++ b/src/kudu/common/columnblock-test.cc
@@ -17,9 +17,23 @@
#include "kudu/common/columnblock.h"
+#include <string>
+
#include <gtest/gtest.h>
#include "kudu/common/common.pb.h"
+#include "kudu/common/rowblock.h"
+#include "kudu/common/types.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/memory/arena.h"
+#include "kudu/util/test_macros.h"
+
+namespace kudu {
+class Slice;
+} // namespace kudu
+
+using std::string;
+using strings::Substitute;
namespace kudu {
@@ -56,4 +70,100 @@ TEST(TestColumnBlock, TestEquals) {
ASSERT_EQ(scb5, scb6);
}
+TEST(TestColumnBlock, TestCopyTo) {
+ ScopedColumnBlock<UINT32> src(8, /*allow_nulls=*/false);
+ ScopedColumnBlock<UINT32> dst(8, /*allow_nulls=*/false);
+
+ for (int i = 0; i < src.nrows(); i++) {
+ src[i] = i;
+ }
+ for (int i = 0; i < dst.nrows(); i++) {
+ dst[i] = 100;
+ }
+
+ SelectionVector sv(src.nrows());
+ sv.SetAllTrue();
+
+ // src: 0 1 2 3 4 5 6 7
+ // dst: 100 100 100 100 100 100 100 100
+ // ------------------------------------
+ // dst: 100 100 100 100 100 3 4 5
+ ASSERT_OK(src.CopyTo(sv, &dst, 3, 5, 3));
+
+ for (int i = 0; i < dst.nrows(); i++) {
+ int expected_val = i < 5 ? 100 : i - 2;
+ ASSERT_EQ(expected_val, dst[i]);
+ }
+}
+
+TEST(TestColumnBlock, TestCopyToIndirectData) {
+ ScopedColumnBlock<STRING> src(8, /*allow_nulls=*/false);
+ ScopedColumnBlock<STRING> dst(8, /*allow_nulls=*/false);
+
+ // Ignore idx 3, and poke a corresponding hole in the selection vector.
+ Slice* next_cell = reinterpret_cast<Slice*>(src.data());
+ for (int i = 0; i < src.nrows(); i++, next_cell++) {
+ if (i == 3) continue;
+ ASSERT_TRUE(src.arena()->RelocateSlice(Substitute("h$0", i), next_cell));
+ }
+ next_cell = reinterpret_cast<Slice*>(dst.data());
+ for (int i = 0; i < dst.nrows(); i++, next_cell++) {
+ ASSERT_TRUE(dst.arena()->RelocateSlice("", next_cell));
+ }
+
+ SelectionVector sv(src.nrows());
+ sv.SetAllTrue();
+ sv.SetRowUnselected(3);
+
+ // src: h0 h1 h2 ?? h4 h5 h6 h7
+ // dst: "" "" "" "" "" "" "" ""
+ // ----------------------------
+ // dst: "" "" "" "" "" "" h4 h5
+ ASSERT_OK(src.CopyTo(sv, &dst, 3, 5, 3));
+
+ for (int i = 0; i < dst.nrows(); i++) {
+ string expected_val = i < 6 ? "" : Substitute("h$0", i - 2);
+ ASSERT_EQ(expected_val, dst[i].ToString());
+ }
+}
+
+TEST(TestColumnBlock, TestCopyToNulls) {
+ ScopedColumnBlock<UINT32> src(8);
+ ScopedColumnBlock<UINT32> dst(8);
+
+ // Initialize idx 3 to null in both 'src' and 'dst'.
+ for (int i = 0; i < src.nrows(); i++) {
+ src.SetCellIsNull(i, i == 3);
+ if (i != 3) {
+ src[i] = i;
+ }
+ }
+ for (int i = 0; i < dst.nrows(); i++) {
+ dst.SetCellIsNull(i, i == 3);
+ if (i != 3) {
+ dst[i] = 100;
+ }
+ }
+
+ SelectionVector sv(src.nrows());
+ sv.SetAllTrue();
+
+ // src: 0 1 2 null 4 5 6 7
+ // dst: 100 100 100 null 100 100 100 100
+ // --------------------------------------
+ // dst: 100 100 100 null 100 null 4 5
+ ASSERT_OK(src.CopyTo(sv, &dst, 3, 5, 3));
+
+ for (int i = 0; i < dst.nrows(); i++) {
+ SCOPED_TRACE(i);
+ if (i == 3 || i == 5) {
+ ASSERT_TRUE(dst.is_null(i));
+ } else {
+ ASSERT_FALSE(dst.is_null(i));
+ int expected_val = i < 6 ? 100 : i - 2;
+ ASSERT_EQ(expected_val, dst[i]);
+ }
+ }
+}
+
} // namespace kudu
diff --git a/src/kudu/common/columnblock.cc b/src/kudu/common/columnblock.cc
new file mode 100644
index 0000000..68ce18c
--- /dev/null
+++ b/src/kudu/common/columnblock.cc
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/common/columnblock.h"
+
+#include <cstring>
+
+#include "kudu/common/row.h"
+#include "kudu/common/rowblock.h"
+
+namespace kudu {
+
+Status ColumnBlock::CopyTo(const SelectionVector& sel_vec,
+ ColumnBlock* dst, size_t src_cell_off,
+ size_t dst_cell_off, size_t num_cells) const {
+ DCHECK_EQ(type_, dst->type_);
+ DCHECK_EQ(is_nullable(), dst->is_nullable());
+ DCHECK_GE(nrows_, src_cell_off + num_cells);
+ DCHECK_GE(dst->nrows_, dst_cell_off + num_cells);
+
+ // Columns with indirect data need to be copied cell-by-cell in order to
+ // perform arena relocation. Deselected cells must be skipped; the source
+ // content could be garbage so it'd be unsafe to access it as indirect data.
+ if (type_->physical_type() == BINARY) {
+ for (size_t cell_idx = 0; cell_idx < num_cells; cell_idx++) {
+ if (sel_vec.IsRowSelected(src_cell_off + cell_idx)) {
+ Cell s(cell(src_cell_off + cell_idx));
+ Cell d(dst->cell(dst_cell_off + cell_idx));
+ RETURN_NOT_OK(CopyCell(s, &d, dst->arena())); // Also copies nullability.
+ }
+ }
+ } else {
+ memcpy(dst->data_ + (dst_cell_off * type_->size()),
+ data_ + (src_cell_off * type_->size()),
+ num_cells * type_->size());
+ if (null_bitmap_) {
+ BitmapCopy(dst->null_bitmap_, dst_cell_off,
+ null_bitmap_, src_cell_off,
+ num_cells);
+ }
+}
+
+ return Status::OK();
+}
+
+} // namespace kudu
diff --git a/src/kudu/common/columnblock.h b/src/kudu/common/columnblock.h
index fe23093..45d0d2a 100644
--- a/src/kudu/common/columnblock.h
+++ b/src/kudu/common/columnblock.h
@@ -14,14 +14,21 @@
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
-#ifndef KUDU_COMMON_COLUMNBLOCK_H
-#define KUDU_COMMON_COLUMNBLOCK_H
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <ostream>
#include <string>
-#include "kudu/common/row.h"
+#include <glog/logging.h>
+
+#include "kudu/common/common.pb.h"
#include "kudu/common/types.h"
#include "kudu/gutil/gscoped_ptr.h"
+#include "kudu/gutil/strings/fastmem.h"
+#include "kudu/gutil/strings/stringpiece.h"
#include "kudu/util/bitmap.h"
#include "kudu/util/memory/arena.h"
#include "kudu/util/memory/overwrite.h"
@@ -30,6 +37,7 @@
namespace kudu {
class ColumnBlockCell;
+class SelectionVector;
// A block of data all belonging to a single column.
// This is simply a view into a buffer - it does not have any associated
@@ -121,6 +129,22 @@ class ColumnBlock {
return s;
}
+ // Copies a range of cells between two ColumnBlocks.
+ //
+ // The extent of the range is designated by 'src_cell_off' and 'num_cells'. It
+ // is copied to 'dst' at 'dst_cell_off'.
+ //
+ // Note: The inclusion of 'sel_vec' in this function is an admission that
+ // ColumnBlocks are always used via RowBlocks, and a requirement for safe
+ // handling of types with indirect data (i.e. deselected cells are not
+ // relocated because doing so would be unsafe).
+ //
+ // TODO(adar): for columns with indirect data, existing arena allocations
+ // belonging to cells in 'dst' that are overwritten will NOT be deallocated.
+ Status CopyTo(const SelectionVector& sel_vec,
+ ColumnBlock* dst, size_t src_cell_off,
+ size_t dst_cell_off, size_t num_cells) const;
+
private:
friend class ColumnBlockCell;
friend class ColumnDataView;
@@ -295,4 +319,3 @@ class ScopedColumnBlock : public ColumnBlock {
};
} // namespace kudu
-#endif
diff --git a/src/kudu/common/rowblock.cc b/src/kudu/common/rowblock.cc
index 32cce4b..8c4a160 100644
--- a/src/kudu/common/rowblock.cc
+++ b/src/kudu/common/rowblock.cc
@@ -19,6 +19,7 @@
#include <glog/logging.h>
#include "kudu/gutil/bits.h"
+#include "kudu/gutil/port.h"
#include "kudu/util/bitmap.h"
namespace kudu {
@@ -32,6 +33,10 @@ SelectionVector::SelectionVector(size_t row_capacity)
}
void SelectionVector::Resize(size_t n_rows) {
+ if (PREDICT_FALSE(n_rows == n_rows_)) {
+ return;
+ }
+
size_t new_bytes = BitmapSize(n_rows);
CHECK_LE(new_bytes, bytes_capacity_);
n_rows_ = n_rows;
@@ -143,10 +148,14 @@ RowBlock::~RowBlock() {
}
}
-void RowBlock::Resize(size_t new_size) {
- CHECK_LE(new_size, row_capacity_);
- nrows_ = new_size;
- sel_vec_.Resize(new_size);
+void RowBlock::Resize(size_t n_rows) {
+ if (PREDICT_FALSE(n_rows == nrows_)) {
+ return;
+ }
+
+ CHECK_LE(n_rows, row_capacity_);
+ nrows_ = n_rows;
+ sel_vec_.Resize(n_rows);
}
} // namespace kudu
diff --git a/src/kudu/common/rowblock.h b/src/kudu/common/rowblock.h
index d2cfc82..aee55a1 100644
--- a/src/kudu/common/rowblock.h
+++ b/src/kudu/common/rowblock.h
@@ -31,6 +31,7 @@
#include "kudu/gutil/macros.h"
#include "kudu/gutil/strings/stringpiece.h"
#include "kudu/util/bitmap.h"
+#include "kudu/util/status.h"
namespace kudu {
@@ -128,6 +129,27 @@ class SelectionVector {
size_t nrows() const { return n_rows_; }
+ // Copies a range of bits between two SelectionVectors.
+ //
+ // The extent of the range is designated by 'src_row_off' and 'num_rows'. It
+ // is copied to 'dst' at 'dst_row_off'.
+ //
+ // Note: 'dst' will be resized if the copy causes it to grow (though this is
+ // just a "logical" resize; no reallocation takes place).
+ void CopyTo(SelectionVector* dst, size_t src_row_off,
+ size_t dst_row_off, size_t num_rows) const {
+ DCHECK_GE(n_rows_, src_row_off + num_rows);
+
+ size_t new_num_rows = dst_row_off + num_rows;
+ if (new_num_rows > dst->nrows()) {
+ // This will crash if 'dst' lacks adequate capacity.
+ dst->Resize(new_num_rows);
+ }
+
+ BitmapCopy(dst->mutable_bitmap(), dst_row_off,
+ bitmap_.get(), src_row_off, num_rows);
+ }
+
private:
// The number of allocated bytes in bitmap_
size_t bytes_capacity_;
@@ -277,6 +299,35 @@ class RowBlock {
return &sel_vec_;
}
+ // Copies a range of rows between two RowBlocks.
+ //
+ // The extent of the range is designated by 'src_row_off' and 'num_rows'. It
+ // is copied to 'dst' at 'dst_row_off'.
+ //
+ // Note: 'dst' will be resized if the copy causes it to grow (though this is
+ // just a "logical" resize; no reallocation takes place).
+ Status CopyTo(RowBlock* dst, size_t src_row_off,
+ size_t dst_row_off, size_t num_rows) const {
+ DCHECK_SCHEMA_EQ(*schema_, *dst->schema());
+ DCHECK_GE(nrows_, src_row_off + num_rows);
+
+ size_t new_num_rows = dst_row_off + num_rows;
+ if (new_num_rows > dst->nrows()) {
+ // This will crash if 'dst' lacks adequate capacity.
+ dst->Resize(new_num_rows);
+ }
+
+ for (size_t col_idx = 0; col_idx < schema_->num_columns(); col_idx++) {
+ ColumnBlock src_cb(column_block(col_idx));
+ ColumnBlock dst_cb(dst->column_block(col_idx));
+ RETURN_NOT_OK(src_cb.CopyTo(sel_vec_, &dst_cb,
+ src_row_off, dst_row_off, num_rows));
+ }
+
+ sel_vec_.CopyTo(&dst->sel_vec_, src_row_off, dst_row_off, num_rows);
+ return Status::OK();
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(RowBlock);
[kudu] 04/05: SelectionVector: pad extra bits with zeroes in
constructor
Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 423814d2e4eb2281a0ccf5fc4702153a608e542a
Author: Adar Dembo <ad...@cloudera.com>
AuthorDate: Fri Apr 12 09:56:27 2019 -0700
SelectionVector: pad extra bits with zeroes in constructor
I found this after removing the MergeIterator's call to SetAllTrue(). It
turns out that if you don't call any functions that set all bytes en masse,
CountSelected() and AnySelected() will misbehave as they'll read garbage
data from any trailing bits beyond the logical end of the bitmap.
We can fix this in one of two ways:
- Modify CountSelected()/AnySelected() to ignore the trailing bits.
- Zero out the trailing bits in the constructor.
I opted for the second approach as I found it easier to implement, and I
suspect it's more performant than the first.
Change-Id: I74baf87c7f38b7eea0ad46825c2421c99f8f42a1
Reviewed-on: http://gerrit.cloudera.org:8080/13009
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mp...@apache.org>
---
src/kudu/common/rowblock-test.cc | 20 ++++++++++++++++++++
src/kudu/common/rowblock.cc | 8 ++------
src/kudu/common/rowblock.h | 27 +++++++++++++++++++--------
3 files changed, 41 insertions(+), 14 deletions(-)
diff --git a/src/kudu/common/rowblock-test.cc b/src/kudu/common/rowblock-test.cc
index 0865289..1dd85b8 100644
--- a/src/kudu/common/rowblock-test.cc
+++ b/src/kudu/common/rowblock-test.cc
@@ -17,6 +17,8 @@
#include "kudu/common/rowblock.h"
+#include <cstddef>
+
#include <gtest/gtest.h>
namespace kudu {
@@ -43,4 +45,22 @@ TEST(TestSelectionVector, TestEquals) {
ASSERT_NE(sv1, sv3);
}
+// Test that SelectionVector functions that operate on bytes rather
+// than bits work correctly even if we haven't set or unset all bytes en masse.
+TEST(TestSelectionVector, TestNonByteAligned) {
+ SelectionVector sv(3);
+
+ for (size_t i = 0; i < sv.nrows(); i++) {
+ sv.SetRowSelected(i);
+ }
+ ASSERT_EQ(sv.nrows(), sv.CountSelected());
+ ASSERT_TRUE(sv.AnySelected());
+
+ for (size_t i = 0; i < sv.nrows(); i++) {
+ sv.SetRowUnselected(i);
+ }
+ ASSERT_EQ(0, sv.CountSelected());
+ ASSERT_FALSE(sv.AnySelected());
+}
+
} // namespace kudu
diff --git a/src/kudu/common/rowblock.cc b/src/kudu/common/rowblock.cc
index 8c4a160..4996973 100644
--- a/src/kudu/common/rowblock.cc
+++ b/src/kudu/common/rowblock.cc
@@ -30,6 +30,7 @@ SelectionVector::SelectionVector(size_t row_capacity)
n_bytes_(bytes_capacity_),
bitmap_(new uint8_t[n_bytes_]) {
CHECK_GT(n_bytes_, 0);
+ PadExtraBitsWithZeroes();
}
void SelectionVector::Resize(size_t n_rows) {
@@ -41,12 +42,7 @@ void SelectionVector::Resize(size_t n_rows) {
CHECK_LE(new_bytes, bytes_capacity_);
n_rows_ = n_rows;
n_bytes_ = new_bytes;
- // Pad with zeroes up to the next byte in order to give CountSelected()
- // and AnySelected() the assumption that the size is an even byte
- size_t bits_in_last_byte = n_rows & 7;
- if (bits_in_last_byte > 0) {
- BitmapChangeBits(&bitmap_[0], n_rows_, 8 - bits_in_last_byte, 0);
- }
+ PadExtraBitsWithZeroes();
}
void SelectionVector::ClearToSelectAtMost(size_t max_rows) {
diff --git a/src/kudu/common/rowblock.h b/src/kudu/common/rowblock.h
index aee55a1..660c272 100644
--- a/src/kudu/common/rowblock.h
+++ b/src/kudu/common/rowblock.h
@@ -111,15 +111,8 @@ class SelectionVector {
// Set all bits in the bitmap to 1
void SetAllTrue() {
- // Initially all rows should be selected.
memset(&bitmap_[0], 0xff, n_bytes_);
- // the last byte in the bitmap may have a few extra bits - need to
- // clear those
-
- int trailer_bits = 8 - (n_rows_ % 8);
- if (trailer_bits != 8) {
- bitmap_[n_bytes_ - 1] >>= trailer_bits;
- }
+ PadExtraBitsWithZeroes();
}
// Set all bits in the bitmap to 0
@@ -151,6 +144,24 @@ class SelectionVector {
}
private:
+
+ // Pads any non-byte-aligned bits at the end of the SelectionVector with zeroes.
+ //
+ // To improve performance, CountSelected() and AnySelected() evaluate the
+ // SelectionVector's bitmap in terms of bytes. As such, they consider all of
+ // the trailing bits, even if the bitmap's bit length is not byte-aligned and
+ // some trailing bits aren't part of the bitmap.
+ //
+ // To address this without sacrificing performance, we need to zero out all
+ // trailing bits at construction time, or after any operation that sets all
+ // bytes in bulk.
+ void PadExtraBitsWithZeroes() {
+ size_t bits_in_last_byte = n_rows_ & 7;
+ if (bits_in_last_byte > 0) {
+ BitmapChangeBits(&bitmap_[0], n_rows_, 8 - bits_in_last_byte, false);
+ }
+ }
+
// The number of allocated bytes in bitmap_
size_t bytes_capacity_;
[kudu] 02/05: [master] remove tserver_service_proto from libmaster
Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
commit f2ce76f7ab4d8e2b801daef3895cefdaa4b72909
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Tue Apr 16 22:43:36 2019 -0700
[master] remove tserver_service_proto from libmaster
Removed tserver_service_proto from target_link_libraries() of the
master library target.
There are no functional changes in this patch.
Change-Id: Iee425fc7558f48af32338941df7bd34465f313f2
Reviewed-on: http://gerrit.cloudera.org:8080/13054
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
---
src/kudu/master/CMakeLists.txt | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/kudu/master/CMakeLists.txt b/src/kudu/master/CMakeLists.txt
index aeaaf44..a8098e8 100644
--- a/src/kudu/master/CMakeLists.txt
+++ b/src/kudu/master/CMakeLists.txt
@@ -69,8 +69,7 @@ target_link_libraries(master
server_process
tablet
token_proto
- tserver
- tserver_service_proto)
+ tserver)
# Tests
SET_KUDU_TEST_LINK_LIBS(