You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@bookkeeper.apache.org by GitBox <gi...@apache.org> on 2018/08/13 13:00:19 UTC

[GitHub] ivankelly closed pull request #1592: Delayed write ensemble change may cause dataloss

ivankelly closed pull request #1592: Delayed write ensemble change may cause dataloss
URL: https://github.com/apache/bookkeeper/pull/1592
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java
index 611b1827c3..a646651a61 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java
@@ -1867,48 +1867,15 @@ EnsembleInfo replaceBookieInMetadata(final Map<Integer, BookieSocketAddress> fai
     }
 
     void handleDelayedWriteBookieFailure() {
-        int curBlockAddCompletions = blockAddCompletions.get();
-        if (bk.getDisableEnsembleChangeFeature().isAvailable()) {
-            if (LOG.isDebugEnabled()) {
-                LOG.debug("Ensemble change is disabled. Failed bookies {} for ledger {}.",
-                        delayedWriteFailedBookies, ledgerId);
-            }
-            return;
-        }
-        int curNumEnsembleChanges = numEnsembleChanges.incrementAndGet();
-        if (curNumEnsembleChanges > maxAllowedEnsembleChanges) {
-            if (LOG.isDebugEnabled()) {
-                LOG.debug("Exceeding maxAllowedEnsembeChanges {}. Failed bookies {} for ledger {}.",
-                        maxAllowedEnsembleChanges, delayedWriteFailedBookies, ledgerId);
-            }
-            return;
-        }
-        if (writeFlags.contains(WriteFlag.DEFERRED_SYNC)) {
-            if (LOG.isDebugEnabled()) {
-                LOG.debug("Cannot perform ensemble change with writeflags {}."
-                        + "Failed bookies {} for ledger {}.",
-                        writeFlags, delayedWriteFailedBookies, ledgerId);
-            }
-            return;
-        }
-        LedgerMetadata metadata = getLedgerMetadata();
-        synchronized (metadata) {
-            try {
-                EnsembleInfo ensembleInfo = replaceBookieInMetadata(delayedWriteFailedBookies, curNumEnsembleChanges);
-                if (ensembleInfo.replacedBookies.isEmpty()) {
-                    return;
-                }
-                if (LOG.isDebugEnabled()) {
-                    LOG.debug("[EnsembleChange-L{}-{}] : writing new ensemble info = {}",
-                            getId(), curNumEnsembleChanges, ensembleInfo);
-                }
-                writeLedgerConfig(new ChangeEnsembleCb(ensembleInfo, curBlockAddCompletions,
-                        curNumEnsembleChanges, false));
-            } catch (BKException.BKNotEnoughBookiesException e) {
-                LOG.error("Could not get additional bookie to remake ensemble: {}", ledgerId);
-            }
-            delayedWriteFailedBookies.clear();
-        }
+        final Map<Integer, BookieSocketAddress> copyDelayedWriteFailedBookies =
+                new HashMap<Integer, BookieSocketAddress>(delayedWriteFailedBookies);
+        delayedWriteFailedBookies.clear();
+
+        // Original intent of this change is to do a best-effort ensemble change.
+        // But this is not possible until the local metadata is completely immutable.
+        // Until the feature "Make LedgerMetadata Immutable #610" Is complete we will use
+        // handleBookieFailure() to handle delayed writes as regular bookie failures.
+        handleBookieFailure(copyDelayedWriteFailedBookies);
     }
 
     void handleBookieFailure(final Map<Integer, BookieSocketAddress> failedBookies) {
@@ -1958,7 +1925,7 @@ void handleBookieFailure(final Map<Integer, BookieSocketAddress> failedBookies)
                             getId(), curNumEnsembleChanges, ensembleInfo, curBlockAddCompletions);
                 }
                 writeLedgerConfig(new ChangeEnsembleCb(ensembleInfo, curBlockAddCompletions,
-                        curNumEnsembleChanges, true));
+                        curNumEnsembleChanges));
                 // clear if there are any delayed write failures were recorded.
                 delayedWriteFailedBookies.clear();
             } catch (BKException.BKNotEnoughBookiesException e) {
@@ -2002,17 +1969,14 @@ public String toString() {
         private final EnsembleInfo ensembleInfo;
         private final int curBlockAddCompletions;
         private final int ensembleChangeIdx;
-        private final boolean addEntryFailureRecovery;
 
         ChangeEnsembleCb(EnsembleInfo ensembleInfo,
                          int curBlockAddCompletions,
-                         int ensembleChangeIdx,
-                         boolean addEntryFailureRecovery) {
+                         int ensembleChangeIdx) {
             super(bk.getMainWorkerPool(), ledgerId);
             this.ensembleInfo = ensembleInfo;
             this.curBlockAddCompletions = curBlockAddCompletions;
             this.ensembleChangeIdx = ensembleChangeIdx;
-            this.addEntryFailureRecovery = addEntryFailureRecovery;
         }
 
         @Override
@@ -2033,17 +1997,11 @@ public void safeOperationComplete(final int rc, LedgerMetadata writtenMetadata)
             } else if (rc != BKException.Code.OK) {
                 LOG.error("[EnsembleChange-L{}-{}] : could not persist ledger metadata : info = {}, "
                         + "closing ledger : {}.", getId(), ensembleChangeIdx, ensembleInfo, rc);
-                if (addEntryFailureRecovery) {
-                    handleUnrecoverableErrorDuringAdd(rc);
-                }
+                handleUnrecoverableErrorDuringAdd(rc);
                 return;
             }
-            int newBlockAddCompletions;
-            if (addEntryFailureRecovery) {
-                newBlockAddCompletions = blockAddCompletions.decrementAndGet();
-            } else {
-                newBlockAddCompletions = blockAddCompletions.get();
-            }
+            int newBlockAddCompletions = blockAddCompletions.decrementAndGet();
+
 
             if (LOG.isDebugEnabled()) {
                 LOG.info("[EnsembleChange-L{}-{}] : completed ensemble change, block add completion {} => {}",
@@ -2054,10 +2012,8 @@ public void safeOperationComplete(final int rc, LedgerMetadata writtenMetadata)
             ensembleChangeCounter.inc();
             LOG.info("New Ensemble: {} for ledger: {}", ensembleInfo.newEnsemble, ledgerId);
 
-            if (addEntryFailureRecovery) {
-                // the failed bookie has been replaced
-                unsetSuccessAndSendWriteRequest(ensembleInfo.replacedBookies);
-            }
+            // the failed bookie has been replaced
+            unsetSuccessAndSendWriteRequest(ensembleInfo.replacedBookies);
         }
 
         @Override
@@ -2232,7 +2188,7 @@ private boolean updateMetadataIfPossible(LedgerMetadata metadata, LedgerMetadata
             // since they might be modified by recovery tool.
             metadata.mergeEnsembles(newMeta.getEnsembles());
             writeLedgerConfig(new ChangeEnsembleCb(ensembleInfo, curBlockAddCompletions,
-                    ensembleChangeIdx, true));
+                    ensembleChangeIdx));
             return true;
         }
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services