You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2016/06/09 20:33:48 UTC

[1/2] incubator-kudu git commit: ts_itest-base.h: wait for bootstrapping to finish when waiting for replicas

Repository: incubator-kudu
Updated Branches:
  refs/heads/master 5df100369 -> f180051a8


ts_itest-base.h: wait for bootstrapping to finish when waiting for replicas

WaitForReplicasAndUpdateLocations() is called by many itests during test
setup. The context is almost always the same:
1. Start a mini cluster, waiting for all tservers to heartbeat.
2. Create a client.
3. Create a table using the client, waiting for table creation to finish.
4. Using WaitForReplicasAndUpdateLocations(), create a tablet to tserver
   multimap via direct GetTableLocations() RPCs.
5. Send RPCs directly to specific tablets using the map built in step 4.

Today's implementation of GetTableLocations() also guarantees that step #4
only completes when all replicas have finished bootstrapping. I have a patch
outstanding that removes that guarantee. Why? Because it's not terribly
useful outside of testing (a tserver can restart at any time, so clients
must always be prepared for TABLET_NOT_RUNNING responses) and because it
simplifies master state. To keep these itests working, we need to find
another way to provide the guarantee.

So here's the fix: use ExternalMiniCluster::WaitForTabletsRunning() to
ensure that all tablets on every server are actually running. But first we
must augment it to wait for a specific tablet count, otherwise it may return
despite a tserver working on a slow CreateTablet() RPC.

Change-Id: I116e0bd8ec9d7abbe830d1d0ea4e35465d990a28
Reviewed-on: http://gerrit.cloudera.org:8080/3308
Tested-by: Adar Dembo <ad...@cloudera.com>
Reviewed-by: Jean-Daniel Cryans


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/5be6858d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/5be6858d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/5be6858d

Branch: refs/heads/master
Commit: 5be6858d7f5b6d01c5c0115eae107489bcdbc04d
Parents: 5df1003
Author: Adar Dembo <ad...@cloudera.com>
Authored: Fri Jun 3 16:07:06 2016 -0700
Committer: Jean-Daniel Cryans <jd...@gerrit.cloudera.org>
Committed: Wed Jun 8 20:33:37 2016 +0000

----------------------------------------------------------------------
 .../alter_table-randomized-test.cc              |  2 +-
 .../integration-tests/external_mini_cluster.cc  | 10 ++++--
 .../integration-tests/external_mini_cluster.h   | 12 +++++--
 src/kudu/integration-tests/ts_itest-base.h      | 38 ++++++++++++++++----
 src/kudu/integration-tests/ts_recovery-itest.cc |  2 +-
 5 files changed, 50 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/5be6858d/src/kudu/integration-tests/alter_table-randomized-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/alter_table-randomized-test.cc b/src/kudu/integration-tests/alter_table-randomized-test.cc
index b5d629a..79a65ff 100644
--- a/src/kudu/integration-tests/alter_table-randomized-test.cc
+++ b/src/kudu/integration-tests/alter_table-randomized-test.cc
@@ -86,7 +86,7 @@ class AlterTableRandomized : public KuduTest {
     cluster_->tablet_server(idx)->Shutdown();
     CHECK_OK(cluster_->tablet_server(idx)->Restart());
     CHECK_OK(cluster_->WaitForTabletsRunning(cluster_->tablet_server(idx),
-        MonoDelta::FromSeconds(60)));
+        -1, MonoDelta::FromSeconds(60)));
   }
 
  protected:

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/5be6858d/src/kudu/integration-tests/external_mini_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster.cc b/src/kudu/integration-tests/external_mini_cluster.cc
index 98a9828..09d3e34 100644
--- a/src/kudu/integration-tests/external_mini_cluster.cc
+++ b/src/kudu/integration-tests/external_mini_cluster.cc
@@ -333,6 +333,7 @@ void ExternalMiniCluster::AssertNoCrashes() {
 }
 
 Status ExternalMiniCluster::WaitForTabletsRunning(ExternalTabletServer* ts,
+                                                  int min_tablet_count,
                                                   const MonoDelta& timeout) {
   TabletServerServiceProxy proxy(messenger_, ts->bound_rpc_addr());
   ListTabletsRequestPB req;
@@ -348,14 +349,17 @@ Status ExternalMiniCluster::WaitForTabletsRunning(ExternalTabletServer* ts,
       return StatusFromPB(resp.error().status());
     }
 
-    int num_not_running = 0;
+    bool all_running = true;
     for (const StatusAndSchemaPB& status : resp.status_and_schema()) {
       if (status.tablet_status().state() != tablet::RUNNING) {
-        num_not_running++;
+        all_running = false;
       }
     }
 
-    if (num_not_running == 0) {
+    // We're done if:
+    // 1. All the tablets are running, and
+    // 2. We've observed as many tablets as we had expected or more.
+    if (all_running && resp.status_and_schema_size() >= min_tablet_count) {
       return Status::OK();
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/5be6858d/src/kudu/integration-tests/external_mini_cluster.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster.h b/src/kudu/integration-tests/external_mini_cluster.h
index 4e06a98..4836549 100644
--- a/src/kudu/integration-tests/external_mini_cluster.h
+++ b/src/kudu/integration-tests/external_mini_cluster.h
@@ -231,9 +231,15 @@ class ExternalMiniCluster {
   // Runs gtest assertions that no servers have crashed.
   void AssertNoCrashes();
 
-  // Wait until all tablets on the given tablet server are in 'RUNNING'
-  // state.
-  Status WaitForTabletsRunning(ExternalTabletServer* ts, const MonoDelta& timeout);
+  // Wait until all tablets on the given tablet server are in the RUNNING
+  // state. Returns Status::TimedOut if 'timeout' elapses and at least one
+  // tablet is not yet RUNNING.
+  //
+  // If 'min_tablet_count' is not -1, will also wait for at least that many
+  // RUNNING tablets to appear before returning (potentially timing out if that
+  // number is never reached).
+  Status WaitForTabletsRunning(ExternalTabletServer* ts, int min_tablet_count,
+                               const MonoDelta& timeout);
 
   // Create a client configured to talk to this cluster.
   // Builder may contain override options for the client. The master address will

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/5be6858d/src/kudu/integration-tests/ts_itest-base.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/ts_itest-base.h b/src/kudu/integration-tests/ts_itest-base.h
index 453857a..ad38b3e 100644
--- a/src/kudu/integration-tests/ts_itest-base.h
+++ b/src/kudu/integration-tests/ts_itest-base.h
@@ -133,10 +133,8 @@ class TabletServerIntegrationTestBase : public TabletServerTestBase {
   // Waits that all replicas for a all tablets of 'kTableId' table are online
   // and creates the tablet_replicas_ map.
   void WaitForReplicasAndUpdateLocations() {
-    int num_retries = 0;
-
     bool replicas_missing = true;
-    do {
+    for (int num_retries = 0; replicas_missing && num_retries < kMaxRetries; num_retries++) {
       std::unordered_multimap<std::string, TServerDetails*> tablet_replicas;
       GetTableLocationsRequestPB req;
       GetTableLocationsResponsePB resp;
@@ -145,7 +143,14 @@ class TabletServerIntegrationTestBase : public TabletServerTestBase {
       controller.set_timeout(MonoDelta::FromSeconds(1));
       CHECK_OK(cluster_->master_proxy()->GetTableLocations(req, &resp, &controller));
       CHECK_OK(controller.status());
-      CHECK(!resp.has_error()) << "Response had an error: " << resp.error().ShortDebugString();
+      if (resp.has_error()) {
+        if (resp.error().code() == master::MasterErrorPB::TABLET_NOT_RUNNING) {
+          LOG(WARNING)<< "At least one tablet is not yet running";
+          SleepFor(MonoDelta::FromSeconds(1));
+          continue;
+        }
+        FAIL() << "Response had a fatal error: " << resp.error().ShortDebugString();
+      }
 
       for (const master::TabletLocationsPB& location : resp.tablet_locations()) {
         for (const master::TabletLocationsPB_ReplicaPB& replica : location.replicas()) {
@@ -158,7 +163,6 @@ class TabletServerIntegrationTestBase : public TabletServerTestBase {
               << location.ShortDebugString();
           replicas_missing = true;
           SleepFor(MonoDelta::FromSeconds(1));
-          num_retries++;
           break;
         }
 
@@ -167,7 +171,29 @@ class TabletServerIntegrationTestBase : public TabletServerTestBase {
       if (!replicas_missing) {
         tablet_replicas_ = tablet_replicas;
       }
-    } while (replicas_missing && num_retries < kMaxRetries);
+    }
+
+    // GetTableLocations() does not guarantee that all replicas are actually
+    // running. Some may still be bootstrapping. Wait for them before
+    // returning.
+    //
+    // Just as with the above loop and its behavior once kMaxRetries is
+    // reached, the wait here is best effort only. That is, if the wait
+    // deadline expires, the resulting timeout failure is ignored.
+    for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
+      ExternalTabletServer* ts = cluster_->tablet_server(i);
+      int expected_tablet_count = 0;
+      for (const auto& e : tablet_replicas_) {
+        if (ts->uuid() == e.second->uuid()) {
+          expected_tablet_count++;
+        }
+      }
+      LOG(INFO) << strings::Substitute(
+          "Waiting for $0 tablets on tserver $1 to finish bootstrapping",
+          expected_tablet_count, ts->uuid());
+      cluster_->WaitForTabletsRunning(ts, expected_tablet_count,
+                                      MonoDelta::FromSeconds(20));
+    }
   }
 
   // Returns the last committed leader of the consensus configuration. Tries to get it from master

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/5be6858d/src/kudu/integration-tests/ts_recovery-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/ts_recovery-itest.cc b/src/kudu/integration-tests/ts_recovery-itest.cc
index 2eecb19..d9b2b15 100644
--- a/src/kudu/integration-tests/ts_recovery-itest.cc
+++ b/src/kudu/integration-tests/ts_recovery-itest.cc
@@ -314,7 +314,7 @@ TEST_P(Kudu969Test, Test) {
   // Restart the TS to trigger bootstrap, and wait for it to start up.
   ts->Shutdown();
   ASSERT_OK(ts->Restart());
-  ASSERT_OK(cluster_->WaitForTabletsRunning(ts, MonoDelta::FromSeconds(90)));
+  ASSERT_OK(cluster_->WaitForTabletsRunning(ts, -1, MonoDelta::FromSeconds(90)));
 
   // Verify that the bootstrapped server matches the other replications, which
   // had no faults.


[2/2] incubator-kudu git commit: Update documentation

Posted by ad...@apache.org.
Update documentation

Added 'unzip' package and the providers of `lsb_release` command
required by enable_devtoolset.sh, for all 3 distros.

Change-Id: Ie3e77a778b757949c28e1cdc014c10316ffd10b3
Reviewed-on: http://gerrit.cloudera.org:8080/3351
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/f180051a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/f180051a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/f180051a

Branch: refs/heads/master
Commit: f180051a85905ccef1a6b4b48f6a253c203af72f
Parents: 5be6858
Author: cnkuyan <ce...@gmail.com>
Authored: Thu Jun 9 09:56:52 2016 +0300
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Thu Jun 9 17:24:30 2016 +0000

----------------------------------------------------------------------
 docs/installation.adoc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/f180051a/docs/installation.adoc
----------------------------------------------------------------------
diff --git a/docs/installation.adoc b/docs/installation.adoc
index a827a5d..e93ac6c 100644
--- a/docs/installation.adoc
+++ b/docs/installation.adoc
@@ -216,7 +216,7 @@ on a version older than 7.0, the Red Hat Developer Toolset must be installed
 ----
 $ sudo yum install gcc gcc-c++ autoconf automake libtool \
   boost-static boost-devel cyrus-sasl-devel \
-  cyrus-sasl-plain patch pkgconfig make rsync vim-common gdb git
+  cyrus-sasl-plain patch pkgconfig make rsync vim-common gdb unzip redhat-lsb-core git
 ----
 
 . If building on RHEL or CentOS older than 7.0, install the Red Hat Developer
@@ -296,7 +296,7 @@ automated deployment scenario. It skips the steps marked *Optional* above.
 
 sudo yum -y install gcc gcc-c++ autoconf automake libtool \
   boost-static boost-devel cyrus-sasl-devel \
-  cyrus-sasl-plain patch pkgconfig make rsync vim-common gdb git
+  cyrus-sasl-plain patch pkgconfig make rsync vim-common gdb unzip redhat-lsb-core git
 DTLS_RPM=rhscl-devtoolset-3-epel-6-x86_64.noarch.rpm
 DTLS_RPM_URL=https://www.softwarecollections.org/en/scls/rhscl/devtoolset-3/epel-6-x86_64/download/${DTLS_RPM}
 wget ${DTLS_RPM_URL} -O ${DTLS_RPM}
@@ -322,7 +322,7 @@ make -j4
 ----
 $ sudo apt-get install git autoconf automake libboost-thread-dev \
   libboost-system-dev curl gcc g++ libsasl2-dev libsasl2-modules \
-  libtool ntp patch pkg-config make rsync unzip vim-common gdb python
+  libtool ntp patch pkg-config make rsync unzip vim-common gdb python lsb-release
 ----
 
 . Optional: Install additional packages to build the documentation
@@ -386,7 +386,7 @@ the steps marked *Optional* above.
 
 sudo apt-get -y install git autoconf automake libboost-thread-dev \
   libboost-system-dev curl gcc g++ libsasl2-dev libsasl2-modules \
-  libtool ntp patch pkg-config make rsync unzip vim-common gdb python
+  libtool ntp patch pkg-config make rsync unzip vim-common gdb python lsb-release
 git clone https://github.com/apache/incubator-kudu kudu
 cd kudu
 thirdparty/build-if-necessary.sh
@@ -411,7 +411,7 @@ built alongside Kudu.
 +
 ----
 $ sudo zypper install autoconf automake curl cyrus-sasl-devel gcc gcc-c++ \
-  gdb git libtool make ntp patch pkg-config python rsync unzip vim
+  gdb git libtool make ntp patch pkg-config python rsync unzip vim lsb-release
 ----
 
 . Install Boost.
@@ -475,7 +475,7 @@ the steps marked *Optional* above.
 #!/bin/bash
 
 sudo zypper install autoconf automake curl cyrus-sasl-devel gcc gcc-c++ \
-  gdb git libtool make ntp patch pkg-config python rsync unzip vim
+  gdb git libtool make ntp patch pkg-config python rsync unzip vim lsb-release
 wget https://downloads.sourceforge.net/project/boost/boost/1.59.0/boost_1_59_0.tar.gz
 tar xzf boost_1_59_0.tar.gz
 pushd boost_1_59_0