You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bookkeeper.apache.org by zh...@apache.org on 2017/10/17 07:33:03 UTC
[bookkeeper] branch master updated: BOOKKEEPER-1062: Fixed the race
in AuditorLedgerCheckerTest
This is an automated email from the ASF dual-hosted git repository.
zhaijia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push:
new 40a7f1d BOOKKEEPER-1062: Fixed the race in AuditorLedgerCheckerTest
40a7f1d is described below
commit 40a7f1de440e6aec0b6640fc34a0ec7460c29ab7
Author: Robert Evans <ev...@yahoo-inc.com>
AuthorDate: Tue Oct 17 15:32:47 2017 +0800
BOOKKEEPER-1062: Fixed the race in AuditorLedgerCheckerTest
Descriptions of the changes in this PR:
This fixes the race condition in the auditor between read only bookies and available bookies. But because I am not sure of the performance impacts to modifying getReadOnlyBookies, I have opted to add in a new getROBookies. But I am happy to change it. This pull request is mostly to show my work and see what others think.
Author: Robert Evans <ev...@yahoo-inc.com>
Reviewers: Enrico Olivelli <eo...@gmail.com>, Jia Zhai <None>, Sijie Guo <si...@apache.org>
This closes #556 from revans2/BOOKKEEPER-1062
---
.../org/apache/bookkeeper/bookie/BookieShell.java | 2 +-
.../apache/bookkeeper/client/BookKeeperAdmin.java | 18 ++++++++++++++----
.../apache/bookkeeper/client/BookieInfoReader.java | 2 +-
.../apache/bookkeeper/client/BookieWatcher.java | 17 ++++++++++++++++-
.../apache/bookkeeper/http/ListBookiesService.java | 3 +--
.../org/apache/bookkeeper/proto/BookieServer.java | 12 ++++++++++++
.../org/apache/bookkeeper/replication/Auditor.java | 2 +-
.../bookkeeper/replication/AuditorElector.java | 5 +++++
.../bookkeeper/replication/ReplicationWorker.java | 4 ++--
.../replication/AuditorLedgerCheckerTest.java | 22 +++++++++++++++++++++-
10 files changed, 74 insertions(+), 13 deletions(-)
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
index f4e943d..1c08eb2 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
@@ -1125,7 +1125,7 @@ public class BookieShell implements Tool {
bookies.addAll(availableBookies);
} else if (cmdLine.hasOption("ro")) {
Collection<BookieSocketAddress> roBookies = bka
- .getReadOnlyBookies();
+ .getReadOnlyBookiesAsync();
bookies.addAll(roBookies);
}
for (BookieSocketAddress b : bookies) {
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
index e0bea68..a0d2fe2 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
@@ -231,12 +231,22 @@ public class BookKeeperAdmin implements AutoCloseable {
}
/**
- * Get a list of readonly bookies
+ * Get a list of readonly bookies synchronously.
*
* @return a collection of bookie addresses
+ * @throws BKException if there are issues trying to read the list.
*/
- public Collection<BookieSocketAddress> getReadOnlyBookies() {
- return bkc.bookieWatcher.getReadOnlyBookies();
+ public Collection<BookieSocketAddress> getReadOnlyBookiesSync() throws BKException {
+ return bkc.bookieWatcher.getReadOnlyBookiesSync();
+ }
+
+ /**
+ * Get a list of readonly bookies asynchronously (may be slightly out of date).
+ *
+ * @return a collection of bookie addresses
+ */
+ public Collection<BookieSocketAddress> getReadOnlyBookiesAsync() {
+ return bkc.bookieWatcher.getReadOnlyBookiesAsync();
}
/**
@@ -1186,7 +1196,7 @@ public class BookKeeperAdmin implements AutoCloseable {
public void decommissionBookie(BookieSocketAddress bookieAddress)
throws CompatibilityException, UnavailableException, KeeperException, InterruptedException, IOException,
BKAuditException, TimeoutException, BKException {
- if (getAvailableBookies().contains(bookieAddress) || getReadOnlyBookies().contains(bookieAddress)) {
+ if (getAvailableBookies().contains(bookieAddress) || getReadOnlyBookiesAsync().contains(bookieAddress)) {
LOG.error("Bookie: {} is not shutdown yet", bookieAddress);
throw BKException.create(BKException.Code.IllegalOpException);
}
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieInfoReader.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieInfoReader.java
index 399af6b..70fe2bd 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieInfoReader.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieInfoReader.java
@@ -402,7 +402,7 @@ public class BookieInfoReader {
Collection<BookieSocketAddress> bookies;
bookies = bk.bookieWatcher.getBookies();
- bookies.addAll(bk.bookieWatcher.getReadOnlyBookies());
+ bookies.addAll(bk.bookieWatcher.getReadOnlyBookiesAsync());
totalSent.set(bookies.size());
for (BookieSocketAddress b : bookies) {
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcher.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcher.java
index 74fe089..b11271f 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcher.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcher.java
@@ -133,6 +133,21 @@ class BookieWatcher implements Watcher, ChildrenCallback {
readOnlyBookieWatcher.notifyBookiesChanged(listener);
}
+ public Collection<BookieSocketAddress> getReadOnlyBookiesSync() throws BKException {
+ try {
+ String znode = this.bookieRegistrationPath + "/" + BookKeeperConstants.READONLY;
+ List<String> children = bk.getZkHandle().getChildren(znode, false);
+ return convertToBookieAddresses(children);
+ } catch (KeeperException ke) {
+ logger.error("Failed to get read only bookie list : ", ke);
+ throw new BKException.ZKException();
+ } catch (InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ logger.error("Interrupted reading read only bookie list", ie);
+ throw new BKException.BKInterruptedException();
+ }
+ }
+
public Collection<BookieSocketAddress> getBookies() throws BKException {
try {
List<String> children = bk.getZkHandle().getChildren(this.bookieRegistrationPath, false);
@@ -148,7 +163,7 @@ class BookieWatcher implements Watcher, ChildrenCallback {
}
}
- Collection<BookieSocketAddress> getReadOnlyBookies() {
+ Collection<BookieSocketAddress> getReadOnlyBookiesAsync() {
return new HashSet<BookieSocketAddress>(readOnlyBookieWatcher.getReadOnlyBookies());
}
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/http/ListBookiesService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/http/ListBookiesService.java
index 5dc849e..5523f6a 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/http/ListBookiesService.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/http/ListBookiesService.java
@@ -26,7 +26,6 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Map;
import org.apache.bookkeeper.client.BookKeeperAdmin;
-import org.apache.bookkeeper.conf.ClientConfiguration;
import org.apache.bookkeeper.conf.ServerConfiguration;
import org.apache.bookkeeper.http.service.HttpEndpointService;
import org.apache.bookkeeper.http.service.HttpServiceRequest;
@@ -71,7 +70,7 @@ public class ListBookiesService implements HttpEndpointService {
params.get("print_hostnames").equals("true");
if (readOnly) {
- bookies.addAll(bka.getReadOnlyBookies());
+ bookies.addAll(bka.getReadOnlyBookiesAsync());
} else {
bookies.addAll(bka.getAvailableBookies());
}
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieServer.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieServer.java
index 21cde18..5e80f15 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieServer.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieServer.java
@@ -235,4 +235,16 @@ public class BookieServer {
public static void main(String[] args) {
Main.main(args);
}
+
+ @Override
+ public String toString() {
+ String id = "UNKNOWN";
+
+ try {
+ id = Bookie.getBookieAddress(conf).toString();
+ } catch (UnknownHostException e) {
+ //Ignored...
+ }
+ return "Bookie Server listening on " + id;
+ }
}
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
index 67bde47..ab35b41 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
@@ -429,7 +429,7 @@ public class Auditor implements BookiesListener {
private List<String> getAvailableBookies() throws BKException {
// Get the available bookies
Collection<BookieSocketAddress> availableBkAddresses = admin.getAvailableBookies();
- Collection<BookieSocketAddress> readOnlyBkAddresses = admin.getReadOnlyBookies();
+ Collection<BookieSocketAddress> readOnlyBkAddresses = admin.getReadOnlyBookiesSync();
availableBkAddresses.addAll(readOnlyBkAddresses);
List<String> availableBookies = new ArrayList<String>();
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
index 4dde544..fe6c10b 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
@@ -365,6 +365,11 @@ public class AuditorElector {
return running.get();
}
+ @Override
+ public String toString() {
+ return "AuditorElector for " + bookieId;
+ }
+
/**
* Compare the votes in the ascending order of the sequence number. Vote
* format is 'V_sequencenumber', comparator will do sorting based on the
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
index d3a1415..98960b0 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
@@ -190,7 +190,7 @@ public class ReplicationWorker implements Runnable {
private void waitTillTargetBookieIsWritable() {
LOG.info("Waiting for target bookie {} to be back in read/write mode", targetBookie);
- while (workerRunning && admin.getReadOnlyBookies().contains(targetBookie)) {
+ while (workerRunning && admin.getReadOnlyBookiesAsync().contains(targetBookie)) {
isInReadOnlyMode = true;
waitBackOffTime();
}
@@ -257,7 +257,7 @@ public class ReplicationWorker implements Runnable {
} catch (BKException.BKLedgerRecoveryException e) {
LOG.warn("BKLedgerRecoveryException "
+ "while replicating the fragment", e);
- if (admin.getReadOnlyBookies().contains(targetBookie)) {
+ if (admin.getReadOnlyBookiesAsync().contains(targetBookie)) {
underreplicationManager.releaseUnderreplicatedLedger(ledgerIdToReplicate);
throw new BKException.BKWriteOnReadOnlyBookieException();
}
diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java
index 0692d33..6627f82 100644
--- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java
+++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java
@@ -40,6 +40,7 @@ import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.bookkeeper.client.AsyncCallback.AddCallback;
import org.apache.bookkeeper.client.BKException;
@@ -168,6 +169,7 @@ public class AuditorLedgerCheckerTest extends BookKeeperClusterTestCase {
// grace period for publishing the bk-ledger
LOG.debug("Waiting for ledgers to be marked as under replicated");
+ waitForAuditToComplete();
underReplicaLatch.await(5, TimeUnit.SECONDS);
Map<Long, String> urLedgerData = getUrLedgerData(urLedgerList);
assertEquals("Missed identifying under replicated ledgers", 1,
@@ -296,6 +298,7 @@ public class AuditorLedgerCheckerTest extends BookKeeperClusterTestCase {
// grace period for publishing the bk-ledger
LOG.debug("Waiting for Auditor to finish ledger check.");
+ waitForAuditToComplete();
assertFalse("latch should not have completed", underReplicaLatch.await(5, TimeUnit.SECONDS));
}
@@ -313,6 +316,7 @@ public class AuditorLedgerCheckerTest extends BookKeeperClusterTestCase {
final CountDownLatch underReplicaLatch = registerUrLedgerWatcher(count);
int bkIndex = bs.size() - 1;
+ LOG.debug("Moving bookie {} {} to read only...", bkIndex, bs.get(bkIndex));
ServerConfiguration bookieConf = bsConfs.get(bkIndex);
BookieServer bk = bs.get(bkIndex);
bookieConf.setReadOnlyModeEnabled(true);
@@ -320,12 +324,14 @@ public class AuditorLedgerCheckerTest extends BookKeeperClusterTestCase {
// grace period for publishing the bk-ledger
LOG.debug("Waiting for Auditor to finish ledger check.");
- assertFalse("latch should not have completed", underReplicaLatch.await(5, TimeUnit.SECONDS));
+ waitForAuditToComplete();
+ assertFalse("latch should not have completed", underReplicaLatch.await(1, TimeUnit.SECONDS));
String shutdownBookie = shutdownBookie(bkIndex);
// grace period for publishing the bk-ledger
LOG.debug("Waiting for ledgers to be marked as under replicated");
+ waitForAuditToComplete();
underReplicaLatch.await(5, TimeUnit.SECONDS);
Map<Long, String> urLedgerData = getUrLedgerData(urLedgerList);
assertEquals("Missed identifying under replicated ledgers", 1, urLedgerList.size());
@@ -767,6 +773,20 @@ public class AuditorLedgerCheckerTest extends BookKeeperClusterTestCase {
LOG.info("*****************Test Complete");
}
+ private void waitForAuditToComplete() throws Exception {
+ long endTime = System.currentTimeMillis() + 5_000;
+ while (System.currentTimeMillis() < endTime) {
+ Auditor auditor = getAuditorBookiesAuditor();
+ if (auditor != null) {
+ Future<?> task = auditor.submitAuditTask();
+ task.get(5, TimeUnit.SECONDS);
+ return;
+ }
+ Thread.sleep(100);
+ }
+ throw new TimeoutException("Could not find an audit within 5 seconds");
+ }
+
/**
* Wait for ledger to be underreplicated, and to be missing all replicas specified
*/
--
To stop receiving notification emails like this one, please contact
['"commits@bookkeeper.apache.org" <co...@bookkeeper.apache.org>'].