You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by da...@apache.org on 2016/09/30 04:42:46 UTC

[6/6] kudu git commit: KUDU-1657: read-only FsManager::Open on active tablet can crash

KUDU-1657: read-only FsManager::Open on active tablet can crash

This fixes a race condition when opening a read-only FsManager on a data
directory with lbm containers that are actively being appended to. See
the code comment for more details. A best-effort repro test case is
included; it typically takes about 35 seconds to repro without the fix.
This issue was identified because it was causing
alter_table-randomized-test to be significantly flaky.

Change-Id: If68b04f1f1b8cd099120a220b1245ecf8f422770
Reviewed-on: http://gerrit.cloudera.org:8080/4551
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Dan Burkert <da...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/70211f84
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/70211f84
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/70211f84

Branch: refs/heads/master
Commit: 70211f84c9888e2ed8ed9ec87c4718e1e091dad0
Parents: 74c9e67
Author: Dan Burkert <da...@cloudera.com>
Authored: Tue Sep 27 19:39:18 2016 -0700
Committer: Dan Burkert <da...@cloudera.com>
Committed: Fri Sep 30 04:41:42 2016 +0000

----------------------------------------------------------------------
 src/kudu/fs/log_block_manager.cc                |  12 +-
 src/kudu/integration-tests/CMakeLists.txt       |   1 +
 .../integration-tests/open-readonly-fs-itest.cc | 146 +++++++++++++++++++
 3 files changed, 158 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/70211f84/src/kudu/fs/log_block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc
index 6866422..0046af9 100644
--- a/src/kudu/fs/log_block_manager.cc
+++ b/src/kudu/fs/log_block_manager.cc
@@ -492,7 +492,17 @@ Status LogBlockContainer::ReadContainerRecords(deque<BlockRecordPB>* records) co
       local_records.pop_back();
       break;
     }
-    CheckBlockRecord(local_records.back(), data_file_size);
+
+    const auto& record = local_records.back();
+    if (PREDICT_FALSE(data_file_size < record.offset() + record.length())) {
+      // KUDU-1657: When opening a container in read-only mode which is actively
+      // being written to by another lbm, we must reinspect the data file's size
+      // frequently in order to account for the latest appends. Inspecting the
+      // file size is expensive, so we only do it when the metadata indicates
+      // that additional data has been written to the file.
+      RETURN_NOT_OK(data_file_->Size(&data_file_size));
+    }
+    CheckBlockRecord(record, data_file_size);
   }
   // NOTE: 'read_status' will never be OK here.
   if (PREDICT_TRUE(read_status.IsEndOfFile())) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/70211f84/src/kudu/integration-tests/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/CMakeLists.txt b/src/kudu/integration-tests/CMakeLists.txt
index 2201120..9e45d95 100644
--- a/src/kudu/integration-tests/CMakeLists.txt
+++ b/src/kudu/integration-tests/CMakeLists.txt
@@ -62,6 +62,7 @@ ADD_KUDU_TEST_DEPENDENCIES(master_migration-itest
   kudu)
 ADD_KUDU_TEST(master_replication-itest RESOURCE_LOCK "master-rpc-ports")
 ADD_KUDU_TEST(master-stress-test RESOURCE_LOCK "master-rpc-ports")
+ADD_KUDU_TEST(open-readonly-fs-itest)
 ADD_KUDU_TEST(raft_consensus-itest RUN_SERIAL true)
 ADD_KUDU_TEST(registration-test RESOURCE_LOCK "master-web-port")
 ADD_KUDU_TEST(table_locations-itest)

http://git-wip-us.apache.org/repos/asf/kudu/blob/70211f84/src/kudu/integration-tests/open-readonly-fs-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/open-readonly-fs-itest.cc b/src/kudu/integration-tests/open-readonly-fs-itest.cc
new file mode 100644
index 0000000..d07f311
--- /dev/null
+++ b/src/kudu/integration-tests/open-readonly-fs-itest.cc
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <thread>
+
+#include "kudu/client/client-test-util.h"
+#include "kudu/fs/fs_manager.h"
+#include "kudu/integration-tests/cluster_itest_util.h"
+#include "kudu/integration-tests/cluster_verifier.h"
+#include "kudu/integration-tests/external_mini_cluster.h"
+#include "kudu/util/random.h"
+#include "kudu/util/test_util.h"
+
+using kudu::client::KuduClient;
+using kudu::client::KuduColumnSchema;
+using kudu::client::KuduSchema;
+using kudu::client::KuduSchemaBuilder;
+using kudu::client::KuduSession;
+using kudu::client::KuduTable;
+using kudu::client::KuduTableCreator;
+using kudu::client::KuduWriteOperation;
+using kudu::client::sp::shared_ptr;
+
+using std::unique_ptr;
+
+namespace kudu {
+namespace itest {
+
+const auto kTimeout = MonoDelta::FromSeconds(60);
+const char kTableName[] = "test-table";
+const int kNumColumns = 50;
+
+class OpenReadonlyFsITest : public KuduTest {
+
+  void SetUp() override {
+    KuduTest::SetUp();
+
+    ExternalMiniClusterOptions opts;
+    opts.num_tablet_servers = 1;
+
+    // To repro KUDU-1657 with high probability, the container file size needs
+    // to be increasing frequently. To ensure this, we reduce the MRS flush
+    // threshold to increase flush frequency and increase the number of MM
+    // threads to encourage frequent compactions. The net effect of both of
+    // these changes: more blocks are written to disk. Turning off container
+    // file preallocation forces every append to increase the file size.
+    opts.extra_tserver_flags.push_back("--log_container_preallocate_bytes=0");
+    opts.extra_tserver_flags.push_back("--maintenance_manager_num_threads=16");
+    opts.extra_tserver_flags.push_back("--flush_threshold_mb=1");
+
+    cluster_.reset(new ExternalMiniCluster(opts));
+    ASSERT_OK(cluster_->Start());
+
+    ASSERT_OK(cluster_->CreateClient(nullptr, &client_));
+  }
+
+ protected:
+  unique_ptr<ExternalMiniCluster> cluster_;
+  shared_ptr<KuduClient> client_;
+};
+
+// Integration test that creates a tablet, then in parallel:
+//  - writes as fast as possible to the tablet, causing as many flushes as possible
+//  - opens read-only instances of the FS manager on the tablet's data directory
+//
+// This is a regression test for KUDU-1657. It typically takes about 35 seconds
+// to trigger that bug.
+TEST_F(OpenReadonlyFsITest, TestWriteAndVerify) {
+  if (!AllowSlowTests()) return;
+
+  KuduSchema schema;
+  KuduSchemaBuilder b;
+  b.AddColumn("key")->Type(KuduColumnSchema::INT64)->NotNull()->PrimaryKey();
+
+  for (int i = 1; i < kNumColumns; i++) {
+    b.AddColumn(strings::Substitute("value-$0", i))
+                       ->Type(KuduColumnSchema::INT64)
+                       ->NotNull();
+  }
+
+  ASSERT_OK(b.Build(&schema));
+  unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+  table_creator->table_name(kTableName)
+                .schema(&schema)
+                .set_range_partition_columns({ })
+                .num_replicas(1);
+
+  ASSERT_OK(table_creator->Create());
+
+  shared_ptr<KuduTable> table;
+  client_->OpenTable(kTableName, &table);
+
+  shared_ptr<KuduSession> session(client_->NewSession());
+  ASSERT_OK(session->SetFlushMode(KuduSession::AUTO_FLUSH_BACKGROUND));
+
+  Random rng(SeedRandom());
+  auto deadline = MonoTime::Now() + kTimeout;
+
+  auto t = std::thread([this, deadline] () {
+      FsManagerOpts fs_opts;
+      fs_opts.read_only = true;
+      fs_opts.wal_path = cluster_->tablet_server(0)->data_dir();
+      fs_opts.data_paths = { cluster_->tablet_server(0)->data_dir() };
+      while (MonoTime::Now() < deadline) {
+        FsManager fs(Env::Default(), fs_opts);
+        CHECK_OK(fs.Open());
+      }
+  });
+
+  int64_t count = 0;
+  while (MonoTime::Now() < deadline) {
+    // Upsert a batch of [500, 1500) random rows.
+    int batch_count = 500 + rng.Next() % 1000;
+    for (int i = 0; i < batch_count; i++) {
+      unique_ptr<KuduWriteOperation> row(table->NewUpsert());
+      for (int i = 0; i < kNumColumns; i++) {
+        ASSERT_OK(row->mutable_row()->SetInt64(i, rng.Next64()));
+      }
+      ASSERT_OK(session->Apply(row.release()));
+    }
+    ASSERT_OK(session->Flush());
+    ASSERT_EQ(0, session->CountPendingErrors());
+    count += batch_count;
+    LOG(INFO) << count << " rows inserted";
+  }
+
+  t.join();
+}
+
+} // namespace itest
+} // namespace kudu