You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@bookkeeper.apache.org by GitBox <gi...@apache.org> on 2018/08/21 18:42:25 UTC

[GitHub] sijie closed pull request #1608: Issue 1578: Fixed deadlock in auditor blocking ZK thread

sijie closed pull request #1608: Issue 1578: Fixed deadlock in auditor blocking ZK thread
URL: https://github.com/apache/bookkeeper/pull/1608
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
index 15868d957f..8578a5b148 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
@@ -32,6 +32,7 @@
 import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.Executors;
+import java.util.concurrent.ForkJoinPool;
 import java.util.concurrent.Future;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.ThreadFactory;
@@ -640,47 +641,51 @@ public void process(final Long ledgerId,
                         return;
                     }
 
-                    LedgerHandle lh = null;
-                    try {
-                        lh = admin.openLedgerNoRecovery(ledgerId);
-                        checker.checkLedger(lh,
-                            new ProcessLostFragmentsCb(lh, callback),
-                            conf.getAuditorLedgerVerificationPercentage());
-                        // we collect the following stats to get a measure of the
-                        // distribution of a single ledger within the bk cluster
-                        // the higher the number of fragments/bookies, the more distributed it is
-                        numFragmentsPerLedger.registerSuccessfulValue(lh.getNumFragments());
-                        numBookiesPerLedger.registerSuccessfulValue(lh.getNumBookies());
-                        numLedgersChecked.inc();
-                    } catch (BKException.BKNoSuchLedgerExistsException bknsle) {
-                        if (LOG.isDebugEnabled()) {
-                            LOG.debug("Ledger was deleted before we could check it", bknsle);
-                        }
-                        callback.processResult(BKException.Code.OK,
-                                               null, null);
-                        return;
-                    } catch (BKException bke) {
-                        LOG.error("Couldn't open ledger " + ledgerId, bke);
-                        callback.processResult(BKException.Code.BookieHandleNotAvailableException,
-                                         null, null);
-                        return;
-                    } catch (InterruptedException ie) {
-                        LOG.error("Interrupted opening ledger", ie);
-                        Thread.currentThread().interrupt();
-                        callback.processResult(BKException.Code.InterruptedException, null, null);
-                        return;
-                    } finally {
-                        if (lh != null) {
-                            try {
-                                lh.close();
-                            } catch (BKException bke) {
-                                LOG.warn("Couldn't close ledger " + ledgerId, bke);
-                            } catch (InterruptedException ie) {
-                                LOG.warn("Interrupted closing ledger " + ledgerId, ie);
-                                Thread.currentThread().interrupt();
+                    // Do not perform blocking calls that involve making ZK calls from within the ZK
+                    // event thread. Jump to background thread instead to avoid deadlock.
+                    ForkJoinPool.commonPool().execute(() -> {
+                        LedgerHandle lh = null;
+                        try {
+                            lh = admin.openLedgerNoRecovery(ledgerId);
+                            checker.checkLedger(lh,
+                                    new ProcessLostFragmentsCb(lh, callback),
+                                    conf.getAuditorLedgerVerificationPercentage());
+                            // we collect the following stats to get a measure of the
+                            // distribution of a single ledger within the bk cluster
+                            // the higher the number of fragments/bookies, the more distributed it is
+                            numFragmentsPerLedger.registerSuccessfulValue(lh.getNumFragments());
+                            numBookiesPerLedger.registerSuccessfulValue(lh.getNumBookies());
+                            numLedgersChecked.inc();
+                        } catch (BKException.BKNoSuchLedgerExistsException bknsle) {
+                            if (LOG.isDebugEnabled()) {
+                                LOG.debug("Ledger was deleted before we could check it", bknsle);
+                            }
+                            callback.processResult(BKException.Code.OK,
+                                    null, null);
+                            return;
+                        } catch (BKException bke) {
+                            LOG.error("Couldn't open ledger " + ledgerId, bke);
+                            callback.processResult(BKException.Code.BookieHandleNotAvailableException,
+                                    null, null);
+                            return;
+                        } catch (InterruptedException ie) {
+                            LOG.error("Interrupted opening ledger", ie);
+                            Thread.currentThread().interrupt();
+                            callback.processResult(BKException.Code.InterruptedException, null, null);
+                            return;
+                        } finally {
+                            if (lh != null) {
+                                try {
+                                    lh.close();
+                                } catch (BKException bke) {
+                                    LOG.warn("Couldn't close ledger " + ledgerId, bke);
+                                } catch (InterruptedException ie) {
+                                    LOG.warn("Interrupted closing ledger " + ledgerId, ie);
+                                    Thread.currentThread().interrupt();
+                                }
                             }
                         }
-                    }
+                    });
                 }
             };
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services