You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bookkeeper.apache.org by zh...@apache.org on 2017/10/17 07:33:03 UTC

[bookkeeper] branch master updated: BOOKKEEPER-1062: Fixed the race in AuditorLedgerCheckerTest

This is an automated email from the ASF dual-hosted git repository.

zhaijia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git


The following commit(s) were added to refs/heads/master by this push:
     new 40a7f1d  BOOKKEEPER-1062: Fixed the race in AuditorLedgerCheckerTest
40a7f1d is described below

commit 40a7f1de440e6aec0b6640fc34a0ec7460c29ab7
Author: Robert Evans <ev...@yahoo-inc.com>
AuthorDate: Tue Oct 17 15:32:47 2017 +0800

    BOOKKEEPER-1062: Fixed the race in AuditorLedgerCheckerTest
    
    Descriptions of the changes in this PR:
    
    This fixes the race condition in the auditor between read only bookies and available bookies.  But because I am not sure of the performance impacts to modifying getReadOnlyBookies, I have opted to add in a new getROBookies.  But I am happy to change it.  This pull request is mostly to show my work and see what others think.
    
    Author: Robert Evans <ev...@yahoo-inc.com>
    
    Reviewers: Enrico Olivelli <eo...@gmail.com>, Jia Zhai <None>, Sijie Guo <si...@apache.org>
    
    This closes #556 from revans2/BOOKKEEPER-1062
---
 .../org/apache/bookkeeper/bookie/BookieShell.java  |  2 +-
 .../apache/bookkeeper/client/BookKeeperAdmin.java  | 18 ++++++++++++++----
 .../apache/bookkeeper/client/BookieInfoReader.java |  2 +-
 .../apache/bookkeeper/client/BookieWatcher.java    | 17 ++++++++++++++++-
 .../apache/bookkeeper/http/ListBookiesService.java |  3 +--
 .../org/apache/bookkeeper/proto/BookieServer.java  | 12 ++++++++++++
 .../org/apache/bookkeeper/replication/Auditor.java |  2 +-
 .../bookkeeper/replication/AuditorElector.java     |  5 +++++
 .../bookkeeper/replication/ReplicationWorker.java  |  4 ++--
 .../replication/AuditorLedgerCheckerTest.java      | 22 +++++++++++++++++++++-
 10 files changed, 74 insertions(+), 13 deletions(-)

diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
index f4e943d..1c08eb2 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookieShell.java
@@ -1125,7 +1125,7 @@ public class BookieShell implements Tool {
                 bookies.addAll(availableBookies);
             } else if (cmdLine.hasOption("ro")) {
                 Collection<BookieSocketAddress> roBookies = bka
-                        .getReadOnlyBookies();
+                        .getReadOnlyBookiesAsync();
                 bookies.addAll(roBookies);
             }
             for (BookieSocketAddress b : bookies) {
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
index e0bea68..a0d2fe2 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
@@ -231,12 +231,22 @@ public class BookKeeperAdmin implements AutoCloseable {
     }
 
     /**
-     * Get a list of readonly bookies
+     * Get a list of readonly bookies synchronously.
      *
      * @return a collection of bookie addresses
+     * @throws BKException if there are issues trying to read the list.
      */
-    public Collection<BookieSocketAddress> getReadOnlyBookies() {
-        return bkc.bookieWatcher.getReadOnlyBookies();
+    public Collection<BookieSocketAddress> getReadOnlyBookiesSync() throws BKException {
+        return bkc.bookieWatcher.getReadOnlyBookiesSync();
+    }
+
+    /**
+     * Get a list of readonly bookies asynchronously (may be slightly out of date).
+     *
+     * @return a collection of bookie addresses
+     */
+    public Collection<BookieSocketAddress> getReadOnlyBookiesAsync() {
+        return bkc.bookieWatcher.getReadOnlyBookiesAsync();
     }
 
     /**
@@ -1186,7 +1196,7 @@ public class BookKeeperAdmin implements AutoCloseable {
     public void decommissionBookie(BookieSocketAddress bookieAddress)
             throws CompatibilityException, UnavailableException, KeeperException, InterruptedException, IOException,
             BKAuditException, TimeoutException, BKException {
-        if (getAvailableBookies().contains(bookieAddress) || getReadOnlyBookies().contains(bookieAddress)) {
+        if (getAvailableBookies().contains(bookieAddress) || getReadOnlyBookiesAsync().contains(bookieAddress)) {
             LOG.error("Bookie: {} is not shutdown yet", bookieAddress);
             throw BKException.create(BKException.Code.IllegalOpException);
         }
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieInfoReader.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieInfoReader.java
index 399af6b..70fe2bd 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieInfoReader.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieInfoReader.java
@@ -402,7 +402,7 @@ public class BookieInfoReader {
 
         Collection<BookieSocketAddress> bookies;
         bookies = bk.bookieWatcher.getBookies();
-        bookies.addAll(bk.bookieWatcher.getReadOnlyBookies());
+        bookies.addAll(bk.bookieWatcher.getReadOnlyBookiesAsync());
 
         totalSent.set(bookies.size());
         for (BookieSocketAddress b : bookies) {
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcher.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcher.java
index 74fe089..b11271f 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcher.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookieWatcher.java
@@ -133,6 +133,21 @@ class BookieWatcher implements Watcher, ChildrenCallback {
         readOnlyBookieWatcher.notifyBookiesChanged(listener);
     }
 
+    public Collection<BookieSocketAddress> getReadOnlyBookiesSync() throws BKException {
+        try {
+            String znode = this.bookieRegistrationPath + "/" + BookKeeperConstants.READONLY;
+            List<String> children = bk.getZkHandle().getChildren(znode, false);
+            return convertToBookieAddresses(children);
+        } catch (KeeperException ke) {
+            logger.error("Failed to get read only bookie list : ", ke);
+            throw new BKException.ZKException();
+        } catch (InterruptedException ie) {
+            Thread.currentThread().interrupt();
+            logger.error("Interrupted reading read only bookie list", ie);
+            throw new BKException.BKInterruptedException();
+        }
+    }
+
     public Collection<BookieSocketAddress> getBookies() throws BKException {
         try {
             List<String> children = bk.getZkHandle().getChildren(this.bookieRegistrationPath, false);
@@ -148,7 +163,7 @@ class BookieWatcher implements Watcher, ChildrenCallback {
         }
     }
 
-    Collection<BookieSocketAddress> getReadOnlyBookies() {
+    Collection<BookieSocketAddress> getReadOnlyBookiesAsync() {
         return new HashSet<BookieSocketAddress>(readOnlyBookieWatcher.getReadOnlyBookies());
     }
 
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/http/ListBookiesService.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/http/ListBookiesService.java
index 5dc849e..5523f6a 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/http/ListBookiesService.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/http/ListBookiesService.java
@@ -26,7 +26,6 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Map;
 import org.apache.bookkeeper.client.BookKeeperAdmin;
-import org.apache.bookkeeper.conf.ClientConfiguration;
 import org.apache.bookkeeper.conf.ServerConfiguration;
 import org.apache.bookkeeper.http.service.HttpEndpointService;
 import org.apache.bookkeeper.http.service.HttpServiceRequest;
@@ -71,7 +70,7 @@ public class ListBookiesService implements HttpEndpointService {
               params.get("print_hostnames").equals("true");
 
             if (readOnly) {
-                bookies.addAll(bka.getReadOnlyBookies());
+                bookies.addAll(bka.getReadOnlyBookiesAsync());
             } else {
                 bookies.addAll(bka.getAvailableBookies());
             }
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieServer.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieServer.java
index 21cde18..5e80f15 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieServer.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/proto/BookieServer.java
@@ -235,4 +235,16 @@ public class BookieServer {
     public static void main(String[] args) {
         Main.main(args);
     }
+
+    @Override
+    public  String toString() {
+        String id = "UNKNOWN";
+
+        try {
+            id = Bookie.getBookieAddress(conf).toString();
+        } catch (UnknownHostException e) {
+            //Ignored...
+        }
+        return "Bookie Server listening on " + id;
+    }
 }
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
index 67bde47..ab35b41 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
@@ -429,7 +429,7 @@ public class Auditor implements BookiesListener {
     private List<String> getAvailableBookies() throws BKException {
         // Get the available bookies
         Collection<BookieSocketAddress> availableBkAddresses = admin.getAvailableBookies();
-        Collection<BookieSocketAddress> readOnlyBkAddresses = admin.getReadOnlyBookies();
+        Collection<BookieSocketAddress> readOnlyBkAddresses = admin.getReadOnlyBookiesSync();
         availableBkAddresses.addAll(readOnlyBkAddresses);
 
         List<String> availableBookies = new ArrayList<String>();
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
index 4dde544..fe6c10b 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
@@ -365,6 +365,11 @@ public class AuditorElector {
         return running.get();
     }
 
+    @Override
+    public String toString() {
+        return "AuditorElector for " + bookieId;
+    }
+
     /**
      * Compare the votes in the ascending order of the sequence number. Vote
      * format is 'V_sequencenumber', comparator will do sorting based on the
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
index d3a1415..98960b0 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
@@ -190,7 +190,7 @@ public class ReplicationWorker implements Runnable {
 
     private void waitTillTargetBookieIsWritable() {
         LOG.info("Waiting for target bookie {} to be back in read/write mode", targetBookie);
-        while (workerRunning && admin.getReadOnlyBookies().contains(targetBookie)) {
+        while (workerRunning && admin.getReadOnlyBookiesAsync().contains(targetBookie)) {
             isInReadOnlyMode = true;
             waitBackOffTime();
         }
@@ -257,7 +257,7 @@ public class ReplicationWorker implements Runnable {
                 } catch (BKException.BKLedgerRecoveryException e) {
                     LOG.warn("BKLedgerRecoveryException "
                             + "while replicating the fragment", e);
-                    if (admin.getReadOnlyBookies().contains(targetBookie)) {
+                    if (admin.getReadOnlyBookiesAsync().contains(targetBookie)) {
                         underreplicationManager.releaseUnderreplicatedLedger(ledgerIdToReplicate);
                         throw new BKException.BKWriteOnReadOnlyBookieException();
                     }
diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java
index 0692d33..6627f82 100644
--- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java
+++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/AuditorLedgerCheckerTest.java
@@ -40,6 +40,7 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.bookkeeper.client.AsyncCallback.AddCallback;
 import org.apache.bookkeeper.client.BKException;
@@ -168,6 +169,7 @@ public class AuditorLedgerCheckerTest extends BookKeeperClusterTestCase {
 
         // grace period for publishing the bk-ledger
         LOG.debug("Waiting for ledgers to be marked as under replicated");
+        waitForAuditToComplete();
         underReplicaLatch.await(5, TimeUnit.SECONDS);
         Map<Long, String> urLedgerData = getUrLedgerData(urLedgerList);
         assertEquals("Missed identifying under replicated ledgers", 1,
@@ -296,6 +298,7 @@ public class AuditorLedgerCheckerTest extends BookKeeperClusterTestCase {
 
         // grace period for publishing the bk-ledger
         LOG.debug("Waiting for Auditor to finish ledger check.");
+        waitForAuditToComplete();
         assertFalse("latch should not have completed", underReplicaLatch.await(5, TimeUnit.SECONDS));
     }
 
@@ -313,6 +316,7 @@ public class AuditorLedgerCheckerTest extends BookKeeperClusterTestCase {
         final CountDownLatch underReplicaLatch = registerUrLedgerWatcher(count);
 
         int bkIndex = bs.size() - 1;
+        LOG.debug("Moving bookie {} {} to read only...", bkIndex, bs.get(bkIndex));
         ServerConfiguration bookieConf = bsConfs.get(bkIndex);
         BookieServer bk = bs.get(bkIndex);
         bookieConf.setReadOnlyModeEnabled(true);
@@ -320,12 +324,14 @@ public class AuditorLedgerCheckerTest extends BookKeeperClusterTestCase {
 
         // grace period for publishing the bk-ledger
         LOG.debug("Waiting for Auditor to finish ledger check.");
-        assertFalse("latch should not have completed", underReplicaLatch.await(5, TimeUnit.SECONDS));
+        waitForAuditToComplete();
+        assertFalse("latch should not have completed", underReplicaLatch.await(1, TimeUnit.SECONDS));
 
         String shutdownBookie = shutdownBookie(bkIndex);
 
         // grace period for publishing the bk-ledger
         LOG.debug("Waiting for ledgers to be marked as under replicated");
+        waitForAuditToComplete();
         underReplicaLatch.await(5, TimeUnit.SECONDS);
         Map<Long, String> urLedgerData = getUrLedgerData(urLedgerList);
         assertEquals("Missed identifying under replicated ledgers", 1, urLedgerList.size());
@@ -767,6 +773,20 @@ public class AuditorLedgerCheckerTest extends BookKeeperClusterTestCase {
         LOG.info("*****************Test Complete");
     }
 
+    private void waitForAuditToComplete() throws Exception {
+        long endTime = System.currentTimeMillis() + 5_000;
+        while (System.currentTimeMillis() < endTime) {
+            Auditor auditor = getAuditorBookiesAuditor();
+            if (auditor != null) {
+                Future<?> task = auditor.submitAuditTask();
+                task.get(5, TimeUnit.SECONDS);
+                return;
+            }
+            Thread.sleep(100);
+        }
+        throw new TimeoutException("Could not find an audit within 5 seconds");
+    }
+
     /**
      * Wait for ledger to be underreplicated, and to be missing all replicas specified
      */

-- 
To stop receiving notification emails like this one, please contact
['"commits@bookkeeper.apache.org" <co...@bookkeeper.apache.org>'].