You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bookkeeper.apache.org by si...@apache.org on 2018/02/19 07:54:25 UTC
[bookkeeper] branch master updated: ISSUE #1139: Add debug to
replication fencing
This is an automated email from the ASF dual-hosted git repository.
sijie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push:
new 546eed4 ISSUE #1139: Add debug to replication fencing
546eed4 is described below
commit 546eed4913e148a8a4f075ee412d1abe28398ffa
Author: JV Jujjuri <vj...@salesforce.com>
AuthorDate: Sun Feb 18 23:54:18 2018 -0800
ISSUE #1139: Add debug to replication fencing
Descriptions of the changes in this PR:
When ledger is fenced, the client may get write error.
Not having enough logging in this area making debugging harder.
Optimised the code in addition to adding more logging in this area.
Signed-off-by: Venkateswararao Jujjuri (JV) <vjujjurisalesforce.com>
Master Issue: #1139
Author: JV Jujjuri <vj...@salesforce.com>
Reviewers: Sijie Guo <si...@apache.org>
This closes #1140 from jvrao/bk-issue-1139, closes #1139
---
.../bookkeeper/replication/ReplicationWorker.java | 50 ++++++++++++++--------
1 file changed, 33 insertions(+), 17 deletions(-)
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
index 65c0c2c..f6ed7d1 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
@@ -339,6 +339,10 @@ public class ReplicationWorker implements Runnable {
Collection<BookieSocketAddress> available = admin.getAvailableBookies();
for (BookieSocketAddress b : finalEnsemble) {
if (!available.contains(b)) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Bookie {} is missing from the list of Available Bookies. ledger {}:ensemble {}.",
+ b, lh.getId(), finalEnsemble);
+ }
return true;
}
}
@@ -366,32 +370,45 @@ public class ReplicationWorker implements Runnable {
TimerTask timerTask = new TimerTask() {
@Override
public void run() {
+ boolean isRecoveryOpen = false;
LedgerHandle lh = null;
try {
lh = admin.openLedgerNoRecovery(ledgerId);
if (isLastSegmentOpenAndMissingBookies(lh)) {
+ // Need recovery open, close the old ledger handle.
+ lh.close();
+ // Recovery open could result in client write failure.
+ LOG.warn("Missing bookie(s) from last segment. Opening Ledger{} for Recovery.", ledgerId);
lh = admin.openLedger(ledgerId);
+ isRecoveryOpen = true;
}
-
- Set<LedgerFragment> fragments =
- getUnderreplicatedFragments(lh, conf.getAuditorLedgerVerificationPercentage());
- for (LedgerFragment fragment : fragments) {
- if (!fragment.isClosed()) {
- lh = admin.openLedger(ledgerId);
- break;
+ if (!isRecoveryOpen){
+ Set<LedgerFragment> fragments =
+ getUnderreplicatedFragments(lh, conf.getAuditorLedgerVerificationPercentage());
+ for (LedgerFragment fragment : fragments) {
+ if (!fragment.isClosed()) {
+ // Need recovery open, close the old ledger handle.
+ lh.close();
+ // Recovery open could result in client write failure.
+ LOG.warn("Open Fragment{}. Opening Ledger{} for Recovery.",
+ fragment.getEnsemble(), ledgerId);
+ lh = admin.openLedger(ledgerId);
+ isRecoveryOpen = true;
+ break;
+ }
}
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
- LOG.info("InterruptedException "
- + "while replicating fragments", e);
+ LOG.info("InterruptedException while fencing the ledger {}"
+ + " for rereplication of postponed ledgers", ledgerId, e);
} catch (BKNoSuchLedgerExistsException bknsle) {
if (LOG.isDebugEnabled()) {
- LOG.debug("Ledger was deleted, safe to continue", bknsle);
+ LOG.debug("Ledger {} was deleted, safe to continue", ledgerId, bknsle);
}
} catch (BKException e) {
- LOG.error("BKException while fencing the ledger"
- + " for rereplication of postponed ledgers", e);
+ LOG.error("BKException while fencing the ledger {}"
+ + " for rereplication of postponed ledgers", ledgerId, e);
} finally {
try {
if (lh != null) {
@@ -399,20 +416,19 @@ public class ReplicationWorker implements Runnable {
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
- LOG.info("InterruptedException while closing "
- + "ledger", e);
+ LOG.info("InterruptedException while closing ledger {}", ledgerId, e);
} catch (BKException e) {
// Lets go ahead and release the lock. Catch actual
// exception in normal replication flow and take
// action.
- LOG.warn("BKException while closing ledger ", e);
+ LOG.warn("BKException while closing ledger {} ", ledgerId, e);
} finally {
try {
underreplicationManager
.releaseUnderreplicatedLedger(ledgerId);
} catch (UnavailableException e) {
- LOG.error("UnavailableException "
- + "while replicating fragments", e);
+ LOG.error("UnavailableException while replicating fragments of ledger {}",
+ ledgerId, e);
shutdown();
}
}
--
To stop receiving notification emails like this one, please contact
sijie@apache.org.