You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@bookkeeper.apache.org by eo...@apache.org on 2019/05/15 06:35:23 UTC
[bookkeeper] branch master updated: [BK-GC] avoid blocking call in
gc-thread
This is an automated email from the ASF dual-hosted git repository.
eolivelli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push:
new f5ddc36 [BK-GC] avoid blocking call in gc-thread
f5ddc36 is described below
commit f5ddc36cc30c5bab5d2e8ebc5fa552c2ad0d6eea
Author: Rajan Dhabalia <rd...@apache.org>
AuthorDate: Tue May 14 23:35:18 2019 -0700
[BK-GC] avoid blocking call in gc-thread
### Motivation
Right now, we have below 3 issues because of which gc thread gets blocked forever and it can't perform gc-task further. Below issues are mainly related to blocking call while doing zk-operation without timeout.
bug-fixes:
1. right now, [GC - ScanAndCompareGarbageCollector](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java#L142) passes timeout in millisecond to [LedgerManager](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java#L166) but it
takes it as second and again try to convert it in millis so, 30Kms timeout becomes [30M ms timeout](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java#L245). Sp, fix timeout unit during gc.
2. Right now, GC makes blocking call to get list of children on ledger znode and sometime zk-call back doesn't comeback which blocks the gc-thread forever. However, recently we added the timeout on the [object-waiting-lock](https://github.com/apache/bookkeeper/blob/master/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java#L243-L248) which doesn't work because it's in while loop and `object.wait(timeout)` completes without any exception and GC threads keep running [...]
3. add zk-timeout during delete ledgers in bookie else it can also block the GC thread.
### Changes
add timeout while bk-gc makes zk-call to verify deleted ledgers.
Reviewers: Enrico Olivelli <eo...@gmail.com>, Sijie Guo <si...@apache.org>, Rajan Dhabalia <rd...@apache.org>
This closes #1940 from rdhabalia/verify_gc
---
.../bookie/ScanAndCompareGarbageCollector.java | 23 ++++++++++++++++------
.../bookkeeper/meta/CleanupLedgerManager.java | 4 ++--
.../apache/bookkeeper/meta/FlatLedgerManager.java | 4 ++--
.../bookkeeper/meta/HierarchicalLedgerManager.java | 6 +++---
.../org/apache/bookkeeper/meta/LedgerManager.java | 4 ++--
.../meta/LegacyHierarchicalLedgerManager.java | 12 +++++------
.../meta/LongHierarchicalLedgerManager.java | 12 +++++------
.../bookkeeper/meta/MSLedgerManagerFactory.java | 2 +-
.../java/org/apache/bookkeeper/util/ZkUtils.java | 12 ++++++++---
.../apache/bookkeeper/bookie/CompactionTest.java | 2 +-
.../client/ParallelLedgerRecoveryTest.java | 4 ++--
.../apache/bookkeeper/meta/MockLedgerManager.java | 2 +-
.../metadata/etcd/EtcdLedgerManager.java | 2 +-
13 files changed, 53 insertions(+), 36 deletions(-)
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
index 247b610..02e47ae 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
@@ -32,6 +32,9 @@ import java.util.SortedMap;
import java.util.TreeSet;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Semaphore;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
import org.apache.bookkeeper.client.BKException;
import org.apache.bookkeeper.conf.ServerConfiguration;
import org.apache.bookkeeper.meta.LedgerManager;
@@ -139,8 +142,9 @@ public class ScanAndCompareGarbageCollector implements GarbageCollector {
}
// Iterate over all the ledger on the metadata store
- long zkOpTimeout = this.conf.getZkTimeout() * 2;
- LedgerRangeIterator ledgerRangeIterator = ledgerManager.getLedgerRanges(zkOpTimeout);
+ long zkOpTimeoutMs = this.conf.getZkTimeout() * 2;
+ LedgerRangeIterator ledgerRangeIterator = ledgerManager
+ .getLedgerRanges(zkOpTimeoutMs);
Set<Long> ledgersInMetadata = null;
long start;
long end = -1;
@@ -167,13 +171,20 @@ public class ScanAndCompareGarbageCollector implements GarbageCollector {
if (verifyMetadataOnGc) {
int rc = BKException.Code.OK;
try {
- result(ledgerManager.readLedgerMetadata(bkLid));
- } catch (BKException e) {
- rc = e.getCode();
+ result(ledgerManager.readLedgerMetadata(bkLid), zkOpTimeoutMs, TimeUnit.MILLISECONDS);
+ } catch (BKException | TimeoutException e) {
+ if (e instanceof BKException) {
+ rc = ((BKException) e).getCode();
+ } else {
+ LOG.warn("Time-out while fetching metadata for Ledger {} : {}.", bkLid,
+ e.getMessage());
+
+ continue;
+ }
}
if (rc != BKException.Code.NoSuchLedgerExistsOnMetadataServerException) {
LOG.warn("Ledger {} Missing in metadata list, but ledgerManager returned rc: {}.",
- bkLid, rc);
+ bkLid, rc);
continue;
}
}
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/CleanupLedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/CleanupLedgerManager.java
index 3c7ddc5..a3ce432 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/CleanupLedgerManager.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/CleanupLedgerManager.java
@@ -210,13 +210,13 @@ public class CleanupLedgerManager implements LedgerManager {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
closeLock.readLock().lock();
try {
if (closed) {
return new ClosedLedgerRangeIterator();
}
- return underlying.getLedgerRanges(zkOpTimeoutSec);
+ return underlying.getLedgerRanges(zkOpTimeoutMs);
} finally {
closeLock.readLock().unlock();
}
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManager.java
index 8c7f05f..7c89326 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManager.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/FlatLedgerManager.java
@@ -89,7 +89,7 @@ class FlatLedgerManager extends AbstractZkLedgerManager {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeOutSec) {
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
return new LedgerRangeIterator() {
// single iterator, can visit only one time
boolean nextCalled = false;
@@ -103,7 +103,7 @@ class FlatLedgerManager extends AbstractZkLedgerManager {
try {
zkActiveLedgers = ledgerListToSet(
- ZkUtils.getChildrenInSingleNode(zk, ledgerRootPath, zkOpTimeOutSec),
+ ZkUtils.getChildrenInSingleNode(zk, ledgerRootPath, zkOpTimeoutMs),
ledgerRootPath);
nextRange = new LedgerRange(zkActiveLedgers);
} catch (KeeperException.NoNodeException e) {
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManager.java
index 079fcfa..9cd6aed 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManager.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/HierarchicalLedgerManager.java
@@ -87,9 +87,9 @@ class HierarchicalLedgerManager extends AbstractHierarchicalLedgerManager {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
- LedgerRangeIterator legacyLedgerRangeIterator = legacyLM.getLedgerRanges(zkOpTimeoutSec);
- LedgerRangeIterator longLedgerRangeIterator = longLM.getLedgerRanges(zkOpTimeoutSec);
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
+ LedgerRangeIterator legacyLedgerRangeIterator = legacyLM.getLedgerRanges(zkOpTimeoutMs);
+ LedgerRangeIterator longLedgerRangeIterator = longLM.getLedgerRanges(zkOpTimeoutMs);
return new HierarchicalLedgerRangeIterator(legacyLedgerRangeIterator, longLedgerRangeIterator);
}
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManager.java
index 15da14b..56c447d 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManager.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LedgerManager.java
@@ -149,12 +149,12 @@ public interface LedgerManager extends Closeable {
/**
* Loop to scan a range of metadata from metadata storage.
*
- * @param zkOpTimeOutSec
+ * @param zkOpTimeOutMs
* Iterator considers timeout while fetching ledger-range from
* zk.
* @return will return a iterator of the Ranges
*/
- LedgerRangeIterator getLedgerRanges(long zkOpTimeOutSec);
+ LedgerRangeIterator getLedgerRanges(long zkOpTimeOutMs);
/**
* Used to represent the Ledgers range returned from the
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManager.java
index 1a63407..9828719 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManager.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LegacyHierarchicalLedgerManager.java
@@ -153,8 +153,8 @@ class LegacyHierarchicalLedgerManager extends AbstractHierarchicalLedgerManager
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
- return new LegacyHierarchicalLedgerRangeIterator(zkOpTimeoutSec);
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
+ return new LegacyHierarchicalLedgerRangeIterator(zkOpTimeoutMs);
}
/**
@@ -166,10 +166,10 @@ class LegacyHierarchicalLedgerManager extends AbstractHierarchicalLedgerManager
private String curL1Nodes = "";
private boolean iteratorDone = false;
private LedgerRange nextRange = null;
- private final long zkOpTimeoutSec;
+ private final long zkOpTimeoutMs;
- public LegacyHierarchicalLedgerRangeIterator(long zkOpTimeoutSec) {
- this.zkOpTimeoutSec = zkOpTimeoutSec;
+ public LegacyHierarchicalLedgerRangeIterator(long zkOpTimeoutMs) {
+ this.zkOpTimeoutMs = zkOpTimeoutMs;
}
/**
@@ -266,7 +266,7 @@ class LegacyHierarchicalLedgerManager extends AbstractHierarchicalLedgerManager
String nodePath = nodeBuilder.toString();
List<String> ledgerNodes = null;
try {
- ledgerNodes = ZkUtils.getChildrenInSingleNode(zk, nodePath, zkOpTimeoutSec);
+ ledgerNodes = ZkUtils.getChildrenInSingleNode(zk, nodePath, zkOpTimeoutMs);
} catch (KeeperException.NoNodeException e) {
/* If the node doesn't exist, we must have raced with a recursive node removal, just
* return an empty list. */
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java
index 95f8e48..7ec2f5a 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/LongHierarchicalLedgerManager.java
@@ -139,8 +139,8 @@ class LongHierarchicalLedgerManager extends AbstractHierarchicalLedgerManager {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
- return new LongHierarchicalLedgerRangeIterator(zkOpTimeoutSec);
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
+ return new LongHierarchicalLedgerRangeIterator(zkOpTimeoutMs);
}
@@ -149,7 +149,7 @@ class LongHierarchicalLedgerManager extends AbstractHierarchicalLedgerManager {
*/
private class LongHierarchicalLedgerRangeIterator implements LedgerRangeIterator {
LedgerRangeIterator rootIterator;
- final long zkOpTimeoutSec;
+ final long zkOpTimeoutMs;
/**
* Returns all children with path as a parent. If path is non-existent,
@@ -163,7 +163,7 @@ class LongHierarchicalLedgerManager extends AbstractHierarchicalLedgerManager {
*/
List<String> getChildrenAt(String path) throws IOException {
try {
- List<String> children = ZkUtils.getChildrenInSingleNode(zk, path, zkOpTimeoutSec);
+ List<String> children = ZkUtils.getChildrenInSingleNode(zk, path, zkOpTimeoutMs);
Collections.sort(children);
return children;
} catch (KeeperException.NoNodeException e) {
@@ -285,8 +285,8 @@ class LongHierarchicalLedgerManager extends AbstractHierarchicalLedgerManager {
}
}
- private LongHierarchicalLedgerRangeIterator(long zkOpTimeoutSec) {
- this.zkOpTimeoutSec = zkOpTimeoutSec;
+ private LongHierarchicalLedgerRangeIterator(long zkOpTimeoutMs) {
+ this.zkOpTimeoutMs = zkOpTimeoutMs;
}
private void bootstrap() throws IOException {
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MSLedgerManagerFactory.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MSLedgerManagerFactory.java
index 201a0a3..9b21c87 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MSLedgerManagerFactory.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/meta/MSLedgerManagerFactory.java
@@ -642,7 +642,7 @@ public class MSLedgerManagerFactory extends AbstractZkLedgerManagerFactory {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
return new MSLedgerRangeIterator();
}
diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java
index 23d0b42..cc0612f 100644
--- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java
+++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/util/ZkUtils.java
@@ -25,7 +25,6 @@ import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.bookkeeper.conf.AbstractConfiguration;
@@ -222,7 +221,7 @@ public class ZkUtils {
* @throws InterruptedException
* @throws IOException
*/
- public static List<String> getChildrenInSingleNode(final ZooKeeper zk, final String node, long timeOutSec)
+ public static List<String> getChildrenInSingleNode(final ZooKeeper zk, final String node, long zkOpTimeoutMs)
throws InterruptedException, IOException, KeeperException.NoNodeException {
final GetChildrenCtx ctx = new GetChildrenCtx();
getChildrenInSingleNode(zk, node, new GenericCallback<List<String>>() {
@@ -240,13 +239,20 @@ public class ZkUtils {
});
synchronized (ctx) {
+ long startTime = System.currentTimeMillis();
while (!ctx.done) {
try {
- ctx.wait(timeOutSec > 0 ? TimeUnit.SECONDS.toMillis(timeOutSec) : 0);
+ ctx.wait(zkOpTimeoutMs > 0 ? zkOpTimeoutMs : 0);
} catch (InterruptedException e) {
ctx.rc = Code.OPERATIONTIMEOUT.intValue();
ctx.done = true;
}
+ // timeout the process if get-children response not received
+ // zkOpTimeoutMs.
+ if (zkOpTimeoutMs > 0 && (System.currentTimeMillis() - startTime) >= zkOpTimeoutMs) {
+ ctx.rc = Code.OPERATIONTIMEOUT.intValue();
+ ctx.done = true;
+ }
}
}
if (Code.NONODE.intValue() == ctx.rc) {
diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionTest.java
index 0d6e5d6..adef116 100644
--- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionTest.java
+++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/CompactionTest.java
@@ -970,7 +970,7 @@ public abstract class CompactionTest extends BookKeeperClusterTestCase {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
final AtomicBoolean hasnext = new AtomicBoolean(true);
return new LedgerManager.LedgerRangeIterator() {
@Override
diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ParallelLedgerRecoveryTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ParallelLedgerRecoveryTest.java
index e6c3c22..177cbe3 100644
--- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ParallelLedgerRecoveryTest.java
+++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ParallelLedgerRecoveryTest.java
@@ -112,8 +112,8 @@ public class ParallelLedgerRecoveryTest extends BookKeeperClusterTestCase {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
- return lm.getLedgerRanges(zkOpTimeoutSec);
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
+ return lm.getLedgerRanges(zkOpTimeoutMs);
}
@Override
diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MockLedgerManager.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MockLedgerManager.java
index 0f818b6..952837b 100644
--- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MockLedgerManager.java
+++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/MockLedgerManager.java
@@ -188,7 +188,7 @@ public class MockLedgerManager implements LedgerManager {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutSec) {
+ public LedgerRangeIterator getLedgerRanges(long zkOpTimeoutMs) {
return null;
}
diff --git a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManager.java b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManager.java
index a40a4bc..f91f2f7 100644
--- a/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManager.java
+++ b/metadata-drivers/etcd/src/main/java/org/apache/bookkeeper/metadata/etcd/EtcdLedgerManager.java
@@ -420,7 +420,7 @@ class EtcdLedgerManager implements LedgerManager {
}
@Override
- public LedgerRangeIterator getLedgerRanges(long opTimeOutSec) {
+ public LedgerRangeIterator getLedgerRanges(long opTimeOutMs) {
KeyStream<Long> ks = new KeyStream<>(
kvClient,
ByteSequence.fromString(EtcdUtils.getLedgerKey(scope, 0L)),