You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2016/06/09 20:33:48 UTC
[1/2] incubator-kudu git commit: ts_itest-base.h: wait for
bootstrapping to finish when waiting for replicas
Repository: incubator-kudu
Updated Branches:
refs/heads/master 5df100369 -> f180051a8
ts_itest-base.h: wait for bootstrapping to finish when waiting for replicas
WaitForReplicasAndUpdateLocations() is called by many itests during test
setup. The context is almost always the same:
1. Start a mini cluster, waiting for all tservers to heartbeat.
2. Create a client.
3. Create a table using the client, waiting for table creation to finish.
4. Using WaitForReplicasAndUpdateLocations(), create a tablet to tserver
multimap via direct GetTableLocations() RPCs.
5. Send RPCs directly to specific tablets using the map built in step 4.
Today's implementation of GetTableLocations() also guarantees that step #4
only completes when all replicas have finished bootstrapping. I have a patch
outstanding that removes that guarantee. Why? Because it's not terribly
useful outside of testing (a tserver can restart at any time, so clients
must always be prepared for TABLET_NOT_RUNNING responses) and because it
simplifies master state. To keep these itests working, we need to find
another way to provide the guarantee.
So here's the fix: use ExternalMiniCluster::WaitForTabletsRunning() to
ensure that all tablets on every server are actually running. But first we
must augment it to wait for a specific tablet count, otherwise it may return
despite a tserver working on a slow CreateTablet() RPC.
Change-Id: I116e0bd8ec9d7abbe830d1d0ea4e35465d990a28
Reviewed-on: http://gerrit.cloudera.org:8080/3308
Tested-by: Adar Dembo <ad...@cloudera.com>
Reviewed-by: Jean-Daniel Cryans
Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/5be6858d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/5be6858d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/5be6858d
Branch: refs/heads/master
Commit: 5be6858d7f5b6d01c5c0115eae107489bcdbc04d
Parents: 5df1003
Author: Adar Dembo <ad...@cloudera.com>
Authored: Fri Jun 3 16:07:06 2016 -0700
Committer: Jean-Daniel Cryans <jd...@gerrit.cloudera.org>
Committed: Wed Jun 8 20:33:37 2016 +0000
----------------------------------------------------------------------
.../alter_table-randomized-test.cc | 2 +-
.../integration-tests/external_mini_cluster.cc | 10 ++++--
.../integration-tests/external_mini_cluster.h | 12 +++++--
src/kudu/integration-tests/ts_itest-base.h | 38 ++++++++++++++++----
src/kudu/integration-tests/ts_recovery-itest.cc | 2 +-
5 files changed, 50 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/5be6858d/src/kudu/integration-tests/alter_table-randomized-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/alter_table-randomized-test.cc b/src/kudu/integration-tests/alter_table-randomized-test.cc
index b5d629a..79a65ff 100644
--- a/src/kudu/integration-tests/alter_table-randomized-test.cc
+++ b/src/kudu/integration-tests/alter_table-randomized-test.cc
@@ -86,7 +86,7 @@ class AlterTableRandomized : public KuduTest {
cluster_->tablet_server(idx)->Shutdown();
CHECK_OK(cluster_->tablet_server(idx)->Restart());
CHECK_OK(cluster_->WaitForTabletsRunning(cluster_->tablet_server(idx),
- MonoDelta::FromSeconds(60)));
+ -1, MonoDelta::FromSeconds(60)));
}
protected:
http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/5be6858d/src/kudu/integration-tests/external_mini_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster.cc b/src/kudu/integration-tests/external_mini_cluster.cc
index 98a9828..09d3e34 100644
--- a/src/kudu/integration-tests/external_mini_cluster.cc
+++ b/src/kudu/integration-tests/external_mini_cluster.cc
@@ -333,6 +333,7 @@ void ExternalMiniCluster::AssertNoCrashes() {
}
Status ExternalMiniCluster::WaitForTabletsRunning(ExternalTabletServer* ts,
+ int min_tablet_count,
const MonoDelta& timeout) {
TabletServerServiceProxy proxy(messenger_, ts->bound_rpc_addr());
ListTabletsRequestPB req;
@@ -348,14 +349,17 @@ Status ExternalMiniCluster::WaitForTabletsRunning(ExternalTabletServer* ts,
return StatusFromPB(resp.error().status());
}
- int num_not_running = 0;
+ bool all_running = true;
for (const StatusAndSchemaPB& status : resp.status_and_schema()) {
if (status.tablet_status().state() != tablet::RUNNING) {
- num_not_running++;
+ all_running = false;
}
}
- if (num_not_running == 0) {
+ // We're done if:
+ // 1. All the tablets are running, and
+ // 2. We've observed as many tablets as we had expected or more.
+ if (all_running && resp.status_and_schema_size() >= min_tablet_count) {
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/5be6858d/src/kudu/integration-tests/external_mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster.h b/src/kudu/integration-tests/external_mini_cluster.h
index 4e06a98..4836549 100644
--- a/src/kudu/integration-tests/external_mini_cluster.h
+++ b/src/kudu/integration-tests/external_mini_cluster.h
@@ -231,9 +231,15 @@ class ExternalMiniCluster {
// Runs gtest assertions that no servers have crashed.
void AssertNoCrashes();
- // Wait until all tablets on the given tablet server are in 'RUNNING'
- // state.
- Status WaitForTabletsRunning(ExternalTabletServer* ts, const MonoDelta& timeout);
+ // Wait until all tablets on the given tablet server are in the RUNNING
+ // state. Returns Status::TimedOut if 'timeout' elapses and at least one
+ // tablet is not yet RUNNING.
+ //
+ // If 'min_tablet_count' is not -1, will also wait for at least that many
+ // RUNNING tablets to appear before returning (potentially timing out if that
+ // number is never reached).
+ Status WaitForTabletsRunning(ExternalTabletServer* ts, int min_tablet_count,
+ const MonoDelta& timeout);
// Create a client configured to talk to this cluster.
// Builder may contain override options for the client. The master address will
http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/5be6858d/src/kudu/integration-tests/ts_itest-base.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/ts_itest-base.h b/src/kudu/integration-tests/ts_itest-base.h
index 453857a..ad38b3e 100644
--- a/src/kudu/integration-tests/ts_itest-base.h
+++ b/src/kudu/integration-tests/ts_itest-base.h
@@ -133,10 +133,8 @@ class TabletServerIntegrationTestBase : public TabletServerTestBase {
// Waits that all replicas for a all tablets of 'kTableId' table are online
// and creates the tablet_replicas_ map.
void WaitForReplicasAndUpdateLocations() {
- int num_retries = 0;
-
bool replicas_missing = true;
- do {
+ for (int num_retries = 0; replicas_missing && num_retries < kMaxRetries; num_retries++) {
std::unordered_multimap<std::string, TServerDetails*> tablet_replicas;
GetTableLocationsRequestPB req;
GetTableLocationsResponsePB resp;
@@ -145,7 +143,14 @@ class TabletServerIntegrationTestBase : public TabletServerTestBase {
controller.set_timeout(MonoDelta::FromSeconds(1));
CHECK_OK(cluster_->master_proxy()->GetTableLocations(req, &resp, &controller));
CHECK_OK(controller.status());
- CHECK(!resp.has_error()) << "Response had an error: " << resp.error().ShortDebugString();
+ if (resp.has_error()) {
+ if (resp.error().code() == master::MasterErrorPB::TABLET_NOT_RUNNING) {
+ LOG(WARNING)<< "At least one tablet is not yet running";
+ SleepFor(MonoDelta::FromSeconds(1));
+ continue;
+ }
+ FAIL() << "Response had a fatal error: " << resp.error().ShortDebugString();
+ }
for (const master::TabletLocationsPB& location : resp.tablet_locations()) {
for (const master::TabletLocationsPB_ReplicaPB& replica : location.replicas()) {
@@ -158,7 +163,6 @@ class TabletServerIntegrationTestBase : public TabletServerTestBase {
<< location.ShortDebugString();
replicas_missing = true;
SleepFor(MonoDelta::FromSeconds(1));
- num_retries++;
break;
}
@@ -167,7 +171,29 @@ class TabletServerIntegrationTestBase : public TabletServerTestBase {
if (!replicas_missing) {
tablet_replicas_ = tablet_replicas;
}
- } while (replicas_missing && num_retries < kMaxRetries);
+ }
+
+ // GetTableLocations() does not guarantee that all replicas are actually
+ // running. Some may still be bootstrapping. Wait for them before
+ // returning.
+ //
+ // Just as with the above loop and its behavior once kMaxRetries is
+ // reached, the wait here is best effort only. That is, if the wait
+ // deadline expires, the resulting timeout failure is ignored.
+ for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
+ ExternalTabletServer* ts = cluster_->tablet_server(i);
+ int expected_tablet_count = 0;
+ for (const auto& e : tablet_replicas_) {
+ if (ts->uuid() == e.second->uuid()) {
+ expected_tablet_count++;
+ }
+ }
+ LOG(INFO) << strings::Substitute(
+ "Waiting for $0 tablets on tserver $1 to finish bootstrapping",
+ expected_tablet_count, ts->uuid());
+ cluster_->WaitForTabletsRunning(ts, expected_tablet_count,
+ MonoDelta::FromSeconds(20));
+ }
}
// Returns the last committed leader of the consensus configuration. Tries to get it from master
http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/5be6858d/src/kudu/integration-tests/ts_recovery-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/ts_recovery-itest.cc b/src/kudu/integration-tests/ts_recovery-itest.cc
index 2eecb19..d9b2b15 100644
--- a/src/kudu/integration-tests/ts_recovery-itest.cc
+++ b/src/kudu/integration-tests/ts_recovery-itest.cc
@@ -314,7 +314,7 @@ TEST_P(Kudu969Test, Test) {
// Restart the TS to trigger bootstrap, and wait for it to start up.
ts->Shutdown();
ASSERT_OK(ts->Restart());
- ASSERT_OK(cluster_->WaitForTabletsRunning(ts, MonoDelta::FromSeconds(90)));
+ ASSERT_OK(cluster_->WaitForTabletsRunning(ts, -1, MonoDelta::FromSeconds(90)));
// Verify that the bootstrapped server matches the other replications, which
// had no faults.
[2/2] incubator-kudu git commit: Update documentation
Posted by ad...@apache.org.
Update documentation
Added 'unzip' package and the providers of `lsb_release` command
required by enable_devtoolset.sh, for all 3 distros.
Change-Id: Ie3e77a778b757949c28e1cdc014c10316ffd10b3
Reviewed-on: http://gerrit.cloudera.org:8080/3351
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/f180051a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/f180051a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/f180051a
Branch: refs/heads/master
Commit: f180051a85905ccef1a6b4b48f6a253c203af72f
Parents: 5be6858
Author: cnkuyan <ce...@gmail.com>
Authored: Thu Jun 9 09:56:52 2016 +0300
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Thu Jun 9 17:24:30 2016 +0000
----------------------------------------------------------------------
docs/installation.adoc | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/f180051a/docs/installation.adoc
----------------------------------------------------------------------
diff --git a/docs/installation.adoc b/docs/installation.adoc
index a827a5d..e93ac6c 100644
--- a/docs/installation.adoc
+++ b/docs/installation.adoc
@@ -216,7 +216,7 @@ on a version older than 7.0, the Red Hat Developer Toolset must be installed
----
$ sudo yum install gcc gcc-c++ autoconf automake libtool \
boost-static boost-devel cyrus-sasl-devel \
- cyrus-sasl-plain patch pkgconfig make rsync vim-common gdb git
+ cyrus-sasl-plain patch pkgconfig make rsync vim-common gdb unzip redhat-lsb-core git
----
. If building on RHEL or CentOS older than 7.0, install the Red Hat Developer
@@ -296,7 +296,7 @@ automated deployment scenario. It skips the steps marked *Optional* above.
sudo yum -y install gcc gcc-c++ autoconf automake libtool \
boost-static boost-devel cyrus-sasl-devel \
- cyrus-sasl-plain patch pkgconfig make rsync vim-common gdb git
+ cyrus-sasl-plain patch pkgconfig make rsync vim-common gdb unzip redhat-lsb-core git
DTLS_RPM=rhscl-devtoolset-3-epel-6-x86_64.noarch.rpm
DTLS_RPM_URL=https://www.softwarecollections.org/en/scls/rhscl/devtoolset-3/epel-6-x86_64/download/${DTLS_RPM}
wget ${DTLS_RPM_URL} -O ${DTLS_RPM}
@@ -322,7 +322,7 @@ make -j4
----
$ sudo apt-get install git autoconf automake libboost-thread-dev \
libboost-system-dev curl gcc g++ libsasl2-dev libsasl2-modules \
- libtool ntp patch pkg-config make rsync unzip vim-common gdb python
+ libtool ntp patch pkg-config make rsync unzip vim-common gdb python lsb-release
----
. Optional: Install additional packages to build the documentation
@@ -386,7 +386,7 @@ the steps marked *Optional* above.
sudo apt-get -y install git autoconf automake libboost-thread-dev \
libboost-system-dev curl gcc g++ libsasl2-dev libsasl2-modules \
- libtool ntp patch pkg-config make rsync unzip vim-common gdb python
+ libtool ntp patch pkg-config make rsync unzip vim-common gdb python lsb-release
git clone https://github.com/apache/incubator-kudu kudu
cd kudu
thirdparty/build-if-necessary.sh
@@ -411,7 +411,7 @@ built alongside Kudu.
+
----
$ sudo zypper install autoconf automake curl cyrus-sasl-devel gcc gcc-c++ \
- gdb git libtool make ntp patch pkg-config python rsync unzip vim
+ gdb git libtool make ntp patch pkg-config python rsync unzip vim lsb-release
----
. Install Boost.
@@ -475,7 +475,7 @@ the steps marked *Optional* above.
#!/bin/bash
sudo zypper install autoconf automake curl cyrus-sasl-devel gcc gcc-c++ \
- gdb git libtool make ntp patch pkg-config python rsync unzip vim
+ gdb git libtool make ntp patch pkg-config python rsync unzip vim lsb-release
wget https://downloads.sourceforge.net/project/boost/boost/1.59.0/boost_1_59_0.tar.gz
tar xzf boost_1_59_0.tar.gz
pushd boost_1_59_0