You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by da...@apache.org on 2016/09/30 04:42:46 UTC
[6/6] kudu git commit: KUDU-1657: read-only FsManager::Open on active
tablet can crash
KUDU-1657: read-only FsManager::Open on active tablet can crash
This fixes a race condition when opening a read-only FsManager on a data
directory with lbm containers that are actively being appended to. See
the code comment for more details. A best-effort repro test case is
included; it typically takes about 35 seconds to repro without the fix.
This issue was identified because it was causing
alter_table-randomized-test to be significantly flaky.
Change-Id: If68b04f1f1b8cd099120a220b1245ecf8f422770
Reviewed-on: http://gerrit.cloudera.org:8080/4551
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Dan Burkert <da...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/70211f84
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/70211f84
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/70211f84
Branch: refs/heads/master
Commit: 70211f84c9888e2ed8ed9ec87c4718e1e091dad0
Parents: 74c9e67
Author: Dan Burkert <da...@cloudera.com>
Authored: Tue Sep 27 19:39:18 2016 -0700
Committer: Dan Burkert <da...@cloudera.com>
Committed: Fri Sep 30 04:41:42 2016 +0000
----------------------------------------------------------------------
src/kudu/fs/log_block_manager.cc | 12 +-
src/kudu/integration-tests/CMakeLists.txt | 1 +
.../integration-tests/open-readonly-fs-itest.cc | 146 +++++++++++++++++++
3 files changed, 158 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/70211f84/src/kudu/fs/log_block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc
index 6866422..0046af9 100644
--- a/src/kudu/fs/log_block_manager.cc
+++ b/src/kudu/fs/log_block_manager.cc
@@ -492,7 +492,17 @@ Status LogBlockContainer::ReadContainerRecords(deque<BlockRecordPB>* records) co
local_records.pop_back();
break;
}
- CheckBlockRecord(local_records.back(), data_file_size);
+
+ const auto& record = local_records.back();
+ if (PREDICT_FALSE(data_file_size < record.offset() + record.length())) {
+ // KUDU-1657: When opening a container in read-only mode which is actively
+ // being written to by another lbm, we must reinspect the data file's size
+ // frequently in order to account for the latest appends. Inspecting the
+ // file size is expensive, so we only do it when the metadata indicates
+ // that additional data has been written to the file.
+ RETURN_NOT_OK(data_file_->Size(&data_file_size));
+ }
+ CheckBlockRecord(record, data_file_size);
}
// NOTE: 'read_status' will never be OK here.
if (PREDICT_TRUE(read_status.IsEndOfFile())) {
http://git-wip-us.apache.org/repos/asf/kudu/blob/70211f84/src/kudu/integration-tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/CMakeLists.txt b/src/kudu/integration-tests/CMakeLists.txt
index 2201120..9e45d95 100644
--- a/src/kudu/integration-tests/CMakeLists.txt
+++ b/src/kudu/integration-tests/CMakeLists.txt
@@ -62,6 +62,7 @@ ADD_KUDU_TEST_DEPENDENCIES(master_migration-itest
kudu)
ADD_KUDU_TEST(master_replication-itest RESOURCE_LOCK "master-rpc-ports")
ADD_KUDU_TEST(master-stress-test RESOURCE_LOCK "master-rpc-ports")
+ADD_KUDU_TEST(open-readonly-fs-itest)
ADD_KUDU_TEST(raft_consensus-itest RUN_SERIAL true)
ADD_KUDU_TEST(registration-test RESOURCE_LOCK "master-web-port")
ADD_KUDU_TEST(table_locations-itest)
http://git-wip-us.apache.org/repos/asf/kudu/blob/70211f84/src/kudu/integration-tests/open-readonly-fs-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/open-readonly-fs-itest.cc b/src/kudu/integration-tests/open-readonly-fs-itest.cc
new file mode 100644
index 0000000..d07f311
--- /dev/null
+++ b/src/kudu/integration-tests/open-readonly-fs-itest.cc
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <thread>
+
+#include "kudu/client/client-test-util.h"
+#include "kudu/fs/fs_manager.h"
+#include "kudu/integration-tests/cluster_itest_util.h"
+#include "kudu/integration-tests/cluster_verifier.h"
+#include "kudu/integration-tests/external_mini_cluster.h"
+#include "kudu/util/random.h"
+#include "kudu/util/test_util.h"
+
+using kudu::client::KuduClient;
+using kudu::client::KuduColumnSchema;
+using kudu::client::KuduSchema;
+using kudu::client::KuduSchemaBuilder;
+using kudu::client::KuduSession;
+using kudu::client::KuduTable;
+using kudu::client::KuduTableCreator;
+using kudu::client::KuduWriteOperation;
+using kudu::client::sp::shared_ptr;
+
+using std::unique_ptr;
+
+namespace kudu {
+namespace itest {
+
+const auto kTimeout = MonoDelta::FromSeconds(60);
+const char kTableName[] = "test-table";
+const int kNumColumns = 50;
+
+class OpenReadonlyFsITest : public KuduTest {
+
+ void SetUp() override {
+ KuduTest::SetUp();
+
+ ExternalMiniClusterOptions opts;
+ opts.num_tablet_servers = 1;
+
+ // To repro KUDU-1657 with high probability, the container file size needs
+ // to be increasing frequently. To ensure this, we reduce the MRS flush
+ // threshold to increase flush frequency and increase the number of MM
+ // threads to encourage frequent compactions. The net effect of both of
+ // these changes: more blocks are written to disk. Turning off container
+ // file preallocation forces every append to increase the file size.
+ opts.extra_tserver_flags.push_back("--log_container_preallocate_bytes=0");
+ opts.extra_tserver_flags.push_back("--maintenance_manager_num_threads=16");
+ opts.extra_tserver_flags.push_back("--flush_threshold_mb=1");
+
+ cluster_.reset(new ExternalMiniCluster(opts));
+ ASSERT_OK(cluster_->Start());
+
+ ASSERT_OK(cluster_->CreateClient(nullptr, &client_));
+ }
+
+ protected:
+ unique_ptr<ExternalMiniCluster> cluster_;
+ shared_ptr<KuduClient> client_;
+};
+
+// Integration test that creates a tablet, then in parallel:
+// - writes as fast as possible to the tablet, causing as many flushes as possible
+// - opens read-only instances of the FS manager on the tablet's data directory
+//
+// This is a regression test for KUDU-1657. It typically takes about 35 seconds
+// to trigger that bug.
+TEST_F(OpenReadonlyFsITest, TestWriteAndVerify) {
+ if (!AllowSlowTests()) return;
+
+ KuduSchema schema;
+ KuduSchemaBuilder b;
+ b.AddColumn("key")->Type(KuduColumnSchema::INT64)->NotNull()->PrimaryKey();
+
+ for (int i = 1; i < kNumColumns; i++) {
+ b.AddColumn(strings::Substitute("value-$0", i))
+ ->Type(KuduColumnSchema::INT64)
+ ->NotNull();
+ }
+
+ ASSERT_OK(b.Build(&schema));
+ unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+ table_creator->table_name(kTableName)
+ .schema(&schema)
+ .set_range_partition_columns({ })
+ .num_replicas(1);
+
+ ASSERT_OK(table_creator->Create());
+
+ shared_ptr<KuduTable> table;
+ client_->OpenTable(kTableName, &table);
+
+ shared_ptr<KuduSession> session(client_->NewSession());
+ ASSERT_OK(session->SetFlushMode(KuduSession::AUTO_FLUSH_BACKGROUND));
+
+ Random rng(SeedRandom());
+ auto deadline = MonoTime::Now() + kTimeout;
+
+ auto t = std::thread([this, deadline] () {
+ FsManagerOpts fs_opts;
+ fs_opts.read_only = true;
+ fs_opts.wal_path = cluster_->tablet_server(0)->data_dir();
+ fs_opts.data_paths = { cluster_->tablet_server(0)->data_dir() };
+ while (MonoTime::Now() < deadline) {
+ FsManager fs(Env::Default(), fs_opts);
+ CHECK_OK(fs.Open());
+ }
+ });
+
+ int64_t count = 0;
+ while (MonoTime::Now() < deadline) {
+ // Upsert a batch of [500, 1500) random rows.
+ int batch_count = 500 + rng.Next() % 1000;
+ for (int i = 0; i < batch_count; i++) {
+ unique_ptr<KuduWriteOperation> row(table->NewUpsert());
+ for (int i = 0; i < kNumColumns; i++) {
+ ASSERT_OK(row->mutable_row()->SetInt64(i, rng.Next64()));
+ }
+ ASSERT_OK(session->Apply(row.release()));
+ }
+ ASSERT_OK(session->Flush());
+ ASSERT_EQ(0, session->CountPendingErrors());
+ count += batch_count;
+ LOG(INFO) << count << " rows inserted";
+ }
+
+ t.join();
+}
+
+} // namespace itest
+} // namespace kudu