You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by jd...@apache.org on 2017/12/22 20:08:53 UTC
kudu git commit: KUDU-2231: sparse column predicate can cause
excessive data-block reads
Repository: kudu
Updated Branches:
refs/heads/branch-1.5.x af119b68d -> 31807acf8
KUDU-2231: sparse column predicate can cause excessive data-block reads
When scanning with a sparsely-matching predicate, the CFileIterator can
repeatedly materialize non-predicate column blocks multiple times. The
result is huge amounts of CPU wasted in block decoding and poor
performance.
The root cause is that CFileIterator::SeekToOrdinal does not check
whether the currently materialized data block contains the ordinal index
being seeked to. Instead, it throws away the currently prepared blocks
(in CFileIterator::PrepareForNewSeek), and re-materializes the blocks
again.
This commit is a very targeted fix. Since I've had some time to get
familiar with this codepath in the past few days, I've found some things
that I think we could improve and simplify in follow-up commits, which
I've filed as KUDU-2243.
Change-Id: I8eb3be4a809f882ccd80c48612099b2071306ff7
Reviewed-on: http://gerrit.cloudera.org:8080/8869
Tested-by: Kudu Jenkins
Reviewed-by: Dan Burkert <da...@apache.org>
(cherry picked from commit 36ecb300901daf4006fae0c9be5657f2f6127233)
Reviewed-on: http://gerrit.cloudera.org:8080/8903
Reviewed-by: Jean-Daniel Cryans <jd...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/31807acf
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/31807acf
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/31807acf
Branch: refs/heads/branch-1.5.x
Commit: 31807acf80e8b9d92761592b04fbeaa463d7a4fc
Parents: af119b6
Author: Dan Burkert <da...@apache.org>
Authored: Fri Dec 15 17:03:25 2017 -0800
Committer: Jean-Daniel Cryans <jd...@apache.org>
Committed: Fri Dec 22 20:08:31 2017 +0000
----------------------------------------------------------------------
src/kudu/cfile/cfile_reader.cc | 62 ++++++++++++++++---------
src/kudu/tablet/tablet-pushdown-test.cc | 67 +++++++++++++++++++++++++++-
2 files changed, 106 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/31807acf/src/kudu/cfile/cfile_reader.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile_reader.cc b/src/kudu/cfile/cfile_reader.cc
index 6311b75..e1346d3 100644
--- a/src/kudu/cfile/cfile_reader.cc
+++ b/src/kudu/cfile/cfile_reader.cc
@@ -665,37 +665,57 @@ CFileIterator::~CFileIterator() {
}
Status CFileIterator::SeekToOrdinal(rowid_t ord_idx) {
- RETURN_NOT_OK(PrepareForNewSeek());
- if (PREDICT_FALSE(posidx_iter_ == nullptr)) {
- return Status::NotSupported("no positional index in file");
- }
+ // Check to see if we already have the required block prepared. Typically
+ // (when seeking forward during a scan), only the final block might be
+ // suitable, since all previous blocks will have been scanned in the prior
+ // batch. Only reusing the final block also ensures posidx_iter_ is already
+ // seeked to the correct block.
+ if (!prepared_blocks_.empty() &&
+ prepared_blocks_.back()->last_row_idx() >= ord_idx &&
+ prepared_blocks_.back()->first_row_idx() <= ord_idx) {
+ PreparedBlock* b = prepared_blocks_.back();
+ prepared_blocks_.pop_back();
+ for (PreparedBlock* pb : prepared_blocks_) {
+ prepared_block_pool_.Destroy(pb);
+ }
+ prepared_blocks_.clear();
- tmp_buf_.clear();
- KeyEncoderTraits<UINT32, faststring>::Encode(ord_idx, &tmp_buf_);
- RETURN_NOT_OK(posidx_iter_->SeekAtOrBefore(Slice(tmp_buf_)));
+ prepared_blocks_.push_back(b);
+ } else {
+ // Otherwise, prepare a new cblock to scan.
+ RETURN_NOT_OK(PrepareForNewSeek());
+ if (PREDICT_FALSE(posidx_iter_ == nullptr)) {
+ return Status::NotSupported("no positional index in file");
+ }
- // TODO: fast seek within block (without reseeking index)
- pblock_pool_scoped_ptr b = prepared_block_pool_.make_scoped_ptr(
- prepared_block_pool_.Construct());
- RETURN_NOT_OK(ReadCurrentDataBlock(*posidx_iter_, b.get()));
-
- // If the data block doesn't actually contain the data
- // we're looking for, then we're probably in the last
- // block in the file.
- // TODO: could assert that each of the index layers is
- // at its last entry (ie HasNext() is false for each)
- if (PREDICT_FALSE(ord_idx > b->last_row_idx())) {
- return Status::NotFound("trying to seek past highest ordinal in file");
+ tmp_buf_.clear();
+ KeyEncoderTraits<UINT32, faststring>::Encode(ord_idx, &tmp_buf_);
+ RETURN_NOT_OK(posidx_iter_->SeekAtOrBefore(Slice(tmp_buf_)));
+
+ pblock_pool_scoped_ptr b = prepared_block_pool_.make_scoped_ptr(
+ prepared_block_pool_.Construct());
+ RETURN_NOT_OK(ReadCurrentDataBlock(*posidx_iter_, b.get()));
+
+ // If the data block doesn't actually contain the data
+ // we're looking for, then we're probably in the last
+ // block in the file.
+ // TODO(unknown): could assert that each of the index layers is
+ // at its last entry (ie HasNext() is false for each)
+ if (PREDICT_FALSE(ord_idx > b->last_row_idx())) {
+ return Status::NotFound("trying to seek past highest ordinal in file");
+ }
+ prepared_blocks_.push_back(b.release());
}
+ PreparedBlock* b = prepared_blocks_.back();
+
// Seek data block to correct index
DCHECK(ord_idx >= b->first_row_idx() &&
ord_idx <= b->last_row_idx())
<< "got wrong data block. looking for ord_idx=" << ord_idx
<< " but got dblk " << b->ToString();
- SeekToPositionInBlock(b.get(), ord_idx - b->first_row_idx());
+ SeekToPositionInBlock(b, ord_idx - b->first_row_idx());
- prepared_blocks_.push_back(b.release());
last_prepare_idx_ = ord_idx;
last_prepare_count_ = 0;
seeked_ = posidx_iter_.get();
http://git-wip-us.apache.org/repos/asf/kudu/blob/31807acf/src/kudu/tablet/tablet-pushdown-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet-pushdown-test.cc b/src/kudu/tablet/tablet-pushdown-test.cc
index cabc2a3..0389928 100644
--- a/src/kudu/tablet/tablet-pushdown-test.cc
+++ b/src/kudu/tablet/tablet-pushdown-test.cc
@@ -35,7 +35,6 @@
#include "kudu/common/scan_spec.h"
#include "kudu/common/schema.h"
#include "kudu/gutil/gscoped_ptr.h"
-#include "kudu/gutil/port.h"
#include "kudu/gutil/stringprintf.h"
#include "kudu/tablet/local_tablet_writer.h"
#include "kudu/tablet/tablet-test-util.h"
@@ -68,7 +67,7 @@ class TabletPushdownTest : public KuduTabletTest,
ColumnSchema("string_val", STRING) }, 1)) {
}
- virtual void SetUp() OVERRIDE {
+ void SetUp() override {
KuduTabletTest::SetUp();
FillTestTablet();
@@ -226,5 +225,69 @@ INSTANTIATE_TEST_CASE_P(AllMemory, TabletPushdownTest, ::testing::Values(ALL_IN_
INSTANTIATE_TEST_CASE_P(SplitMemoryDisk, TabletPushdownTest, ::testing::Values(SPLIT_MEMORY_DISK));
INSTANTIATE_TEST_CASE_P(AllDisk, TabletPushdownTest, ::testing::Values(ALL_ON_DISK));
+class TabletSparsePushdownTest : public KuduTabletTest {
+ public:
+ TabletSparsePushdownTest()
+ : KuduTabletTest(Schema({ ColumnSchema("key", INT32),
+ ColumnSchema("val", INT32, true) },
+ 1)) {
+ }
+
+ void SetUp() override {
+ KuduTabletTest::SetUp();
+
+ RowBuilder rb(client_schema_);
+
+ LocalTabletWriter writer(tablet().get(), &client_schema_);
+ KuduPartialRow row(&client_schema_);
+
+ // FLAGS_scanner_batch_size_rows * 2
+ int kWidth = 100 * 2;
+ for (int i = 0; i < kWidth * 2; i++) {
+ CHECK_OK(row.SetInt32(0, i));
+ if (i % 3 == 0) {
+ CHECK_OK(row.SetNull(1));
+ } else {
+ CHECK_OK(row.SetInt32(1, i % kWidth));
+ }
+ ASSERT_OK_FAST(writer.Insert(row));
+ }
+ ASSERT_OK(tablet()->Flush());
+ }
+};
+
+// The is a regression test for KUDU-2231, which fixed a CFileReader bug causing
+// cblocks to be repeatedly materialized when a scan had predicates on other
+// columns which caused sections of a column to be skipped. The setup for this
+// test creates a dataset and predicate which only match every-other batch of
+// 100 rows of the 'val' column.
+TEST_F(TabletSparsePushdownTest, Kudu2231) {
+ ScanSpec spec;
+ int32_t value = 50;
+ spec.AddPredicate(ColumnPredicate::Equality(schema_.column(1), &value));
+
+ gscoped_ptr<RowwiseIterator> iter;
+ ASSERT_OK(tablet()->NewRowIterator(client_schema_, &iter));
+ ASSERT_OK(iter->Init(&spec));
+ ASSERT_TRUE(spec.predicates().empty()) << "Should have accepted all predicates";
+
+ vector<string> results;
+ ASSERT_OK(IterateToStringList(iter.get(), &results));
+
+ EXPECT_EQ(results, vector<string>({
+ "(int32 key=50, int32 val=50)",
+ "(int32 key=250, int32 val=50)",
+ }));
+
+ vector<IteratorStats> stats;
+ iter->GetIteratorStats(&stats);
+
+ EXPECT_EQ(1, stats[0].data_blocks_read_from_disk);
+ EXPECT_EQ(1, stats[1].data_blocks_read_from_disk);
+
+ EXPECT_EQ(400, stats[0].cells_read_from_disk);
+ EXPECT_EQ(400, stats[1].cells_read_from_disk);
+}
+
} // namespace tablet
} // namespace kudu