You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bookkeeper.apache.org by eo...@apache.org on 2018/12/20 22:57:00 UTC

[bookkeeper] branch branch-4.8 updated: Issue#1886 Handle double bookie failures

This is an automated email from the ASF dual-hosted git repository.

eolivelli pushed a commit to branch branch-4.8
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git


The following commit(s) were added to refs/heads/branch-4.8 by this push:
     new c087c52  Issue#1886 Handle double bookie failures
c087c52 is described below

commit c087c52724cec44798a0d49771f01e11f8e7c019
Author: Venkateswararao Jujjuri (JV) <ju...@gmail.com>
AuthorDate: Thu Dec 20 14:56:55 2018 -0800

    Issue#1886 Handle double bookie failures
    
    For this race condition to happen:
    1. ZK metadata version is different from Client write ledger
       handle - Replication worker
    2. Client made an ensemble change and replaced a bookie,
       sent change proposal to zk
    3. While this is pending, Client made another ensemble change
       replaced the same index with the bookie that was prior to step#2
    4. Ensemble change made in step#2 came back to client with
       version conflict error.
    5. Client need to resolve this conflict
    
    The fix is to reconsile the local state, zk state, and do bookie
    replaceemnt, send the updated metadata to zk.
    
    Signed-off-by: Venkateswararao Jujjuri (JV) <vjujjurisalesforce.com>
    (rev Sam Just)
    
    Descriptions of the changes in this PR:
    
    
    
    ### Motivation
    
    (Explain: why you're making that change, what is the problem you're trying to solve)
    
    ### Changes
    
    (Describe: what changes you have made)
    
    Master Issue: #<master-issue-number>
    
    
    
    
    Reviewers: Enrico Olivelli <eo...@gmail.com>, Sijie Guo <si...@apache.org>
    
    This closes #1887 from jvrao/metadata-final, closes #1886
---
 .../org/apache/bookkeeper/client/LedgerHandle.java | 33 ++++++++----
 .../bookkeeper/client/BookieWriteLedgerTest.java   | 61 ++++++++++++++++++++++
 2 files changed, 85 insertions(+), 9 deletions(-)

diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java
index 65b3818..245309e 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java
@@ -1972,6 +1972,11 @@ public class LedgerHandle implements WriteHandle {
         }
     }
 
+    boolean testStubResolveConflict() {
+        // No test inserts by default
+        return false;
+    }
+
     // Contains newly reformed ensemble, bookieIndex, failedBookieAddress
     static final class EnsembleInfo {
         private final ArrayList<BookieSocketAddress> newEnsemble;
@@ -2064,7 +2069,7 @@ public class LedgerHandle implements WriteHandle {
      */
     private final class ReReadLedgerMetadataCb extends OrderedGenericCallback<LedgerMetadata> {
         private final int rc;
-        private final EnsembleInfo ensembleInfo;
+        private EnsembleInfo ensembleInfo;
         private final int curBlockAddCompletions;
         private final int ensembleChangeIdx;
 
@@ -2113,6 +2118,9 @@ public class LedgerHandle implements WriteHandle {
          */
         private boolean resolveConflict(LedgerMetadata newMeta) {
             LedgerMetadata metadata = getLedgerMetadata();
+            if (testStubResolveConflict()) {
+                LOG.info("Invoked the Stub - Checkout the test case for the expected behavior");
+            }
             if (LOG.isDebugEnabled()) {
                 LOG.debug("[EnsembleChange-L{}-{}] : resolving conflicts - local metadata = \n {} \n,"
                     + " zk metadata = \n {} \n", ledgerId, ensembleChangeIdx, metadata, newMeta);
@@ -2155,12 +2163,7 @@ public class LedgerHandle implements WriteHandle {
             // update the ensemble change metadata again. Otherwise, it means that the ensemble change
             // is already succeed, unset the success and re-adding entries.
             if (!areFailedBookiesReplaced(newMeta, ensembleInfo)) {
-                // If the in-memory data doesn't contains the failed bookie, it means the ensemble change
-                // didn't finish, so try to resolve conflicts with the metadata read from zookeeper and
-                // update ensemble changed metadata again.
-                if (areFailedBookiesReplaced(metadata, ensembleInfo)) {
-                    return updateMetadataIfPossible(metadata, newMeta);
-                }
+                return updateMetadataIfPossible(metadata, newMeta);
             } else {
                 ensembleChangeCounter.inc();
                 // We've successfully changed an ensemble
@@ -2222,9 +2225,21 @@ public class LedgerHandle implements WriteHandle {
             metadata.setVersion(newMeta.getVersion());
             // merge ensemble infos from new meta except last ensemble
             // since they might be modified by recovery tool.
+
             metadata.mergeEnsembles(newMeta.getEnsembles());
-            writeLedgerConfig(new ChangeEnsembleCb(ensembleInfo, curBlockAddCompletions,
-                    ensembleChangeIdx));
+            if (!areFailedBookiesReplaced(metadata, ensembleInfo)) {
+                // If the in-memory data contains the failed bookie, someone else
+                // merged the metadata and reinstated old copy. Let's attempt to
+                // replace the bookie again.
+                try {
+                    ensembleInfo = replaceBookieInMetadata(ensembleInfo.failedBookies, numEnsembleChanges.get());
+                } catch (BKException.BKNotEnoughBookiesException e) {
+                    LOG.error("Could not get additional bookie to remake ensemble, closing ledger: {}", ledgerId);
+                    return false;
+                }
+            }
+
+            writeLedgerConfig(new ChangeEnsembleCb(ensembleInfo, curBlockAddCompletions, ensembleChangeIdx));
             return true;
         }
 
diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java
index 85376ea..7127a35 100644
--- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java
+++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java
@@ -24,10 +24,13 @@ import static org.apache.bookkeeper.client.BookKeeperClientStats.ADD_OP;
 import static org.apache.bookkeeper.client.BookKeeperClientStats.ADD_OP_UR;
 import static org.apache.bookkeeper.client.BookKeeperClientStats.CLIENT_SCOPE;
 import static org.apache.bookkeeper.client.BookKeeperClientStats.READ_OP_DM;
+import static org.apache.bookkeeper.common.concurrent.FutureUtils.result;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.when;
 
 import com.google.common.collect.Lists;
 import io.netty.buffer.AbstractByteBufAllocator;
@@ -62,7 +65,10 @@ import org.apache.bookkeeper.client.api.WriteAdvHandle;
 import org.apache.bookkeeper.conf.ServerConfiguration;
 import org.apache.bookkeeper.meta.LongHierarchicalLedgerManagerFactory;
 import org.apache.bookkeeper.net.BookieSocketAddress;
+import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.GenericCallback;
 import org.apache.bookkeeper.test.BookKeeperClusterTestCase;
+import org.apache.bookkeeper.test.TestCallbacks.GenericCallbackFuture;
+import org.apache.bookkeeper.versioning.LongVersion;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.zookeeper.KeeperException;
 import org.junit.Assert;
@@ -170,6 +176,61 @@ public class BookieWriteLedgerTest extends
         lh.close();
     }
 
+    /*
+     * Verify that a double ensemble change with version conflict.
+     * Steps:
+     * 1. ZK metadata version is different from Client write ledger handle - Replication worker
+     * 2. Client made an ensemble change and replaced a bookie, sent change proposal to zk
+     * 3. While this is pending, Client made another ensemble change replaced the same index
+     *    with the bookie that was prior to step#2
+     * 4. Ensemble change made in step#2 came back to client with version conflict error.
+     * 5. Client need to resolve this conflict.
+     */
+    @Test
+    public void testSameIndexFailure() throws Exception {
+        lh = bkc.createLedger(5, 5, digestType, ledgerPassword);
+        LedgerHandle mlh = spy(lh);
+        LOG.info("Ledger ID: " + mlh.getId());
+        for (int i = 0; i < numEntriesToWrite; i++) {
+           ByteBuffer entry = ByteBuffer.allocate(4);
+           entry.putInt(rng.nextInt(maxInt));
+           entry.position(0);
+           entries1.add(entry.array());
+           mlh.addEntry(entry.array());
+        }
+        // Change the metadata version on ZK.
+        LedgerMetadata myMetadata = new LedgerMetadata(mlh.getLedgerMetadata());
+        // Set it in Metadata server
+        long savedVersion = ((LongVersion) mlh.getLedgerMetadata().getVersion()).getLongVersion();
+        GenericCallbackFuture<LedgerMetadata> callbackFuture = new GenericCallbackFuture<>();
+        bkc.getLedgerManager().writeLedgerMetadata(mlh.getId(), myMetadata, callbackFuture);
+        callbackFuture.get();
+        // Set the saved version back
+        lh.getLedgerMetadata().setVersion(new LongVersion(savedVersion));
+        // Add a bookie
+        BookieSocketAddress newBkAddr = startNewBookieAndReturnAddress();
+        // Put a old bookie to sleep
+        CountDownLatch sleepLatch1 = new CountDownLatch(1);
+        List<BookieSocketAddress> ensemble = mlh.getLedgerMetadata()
+                .getEnsembles().entrySet().iterator().next().getValue();
+        sleepBookie(ensemble.get(0), sleepLatch1);
+        Map<Integer, BookieSocketAddress> injectFailedBookies =
+                new HashMap<Integer, BookieSocketAddress>();
+        injectFailedBookies.put(0, newBkAddr);
+        when(mlh.testStubResolveConflict()).thenAnswer(
+                invoke -> {
+                    mlh.replaceBookieInMetadata(injectFailedBookies, 0);
+                    return true;
+                });
+
+        // Make a new write
+        ByteBuffer entry = ByteBuffer.allocate(4);
+        entry.putInt(rng.nextInt(maxInt));
+        entry.position(0);
+        entries1.add(entry.array());
+        mlh.addEntry(entry.array());
+        sleepLatch1.countDown();
+    }
     /**
      * Verify write and Read durability stats.
      */