You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by el...@apache.org on 2020/01/14 00:03:46 UTC
[hbase] branch branch-2 updated: HBASE-23679 FileSystem objects
leak when cleaned up in cleanupBulkLoad
This is an automated email from the ASF dual-hosted git repository.
elserj pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new 4bf7fb8 HBASE-23679 FileSystem objects leak when cleaned up in cleanupBulkLoad
4bf7fb8 is described below
commit 4bf7fb861377d5f4e0dd50fc6375025e91ad1ec2
Author: Josh Elser <el...@apache.org>
AuthorDate: Mon Jan 13 18:30:30 2020 -0500
HBASE-23679 FileSystem objects leak when cleaned up in cleanupBulkLoad
The cleanupBulkLoad method is only called for the first Region in the
table which was being bulk loaded into. This means that potentially N-1
other RegionServers (where N is the number of RegionServers) will leak
one FileSystem object into the FileSystem cache which will never be
cleaned up. We need to do this clean-up as a part of secureBulkLoadHFiles
otherwise we cannot guarantee that heap usage won't grow unbounded.
Closes #1029
Signed-off-by: Sean Busbey <bu...@apache.org>
---
.../hbase/regionserver/SecureBulkLoadManager.java | 37 ++++++++++++----------
1 file changed, 20 insertions(+), 17 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java
index d0f3943..ff9b229 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/SecureBulkLoadManager.java
@@ -152,26 +152,15 @@ public class SecureBulkLoadManager {
public void cleanupBulkLoad(final HRegion region, final CleanupBulkLoadRequest request)
throws IOException {
- try {
- region.getCoprocessorHost().preCleanupBulkLoad(getActiveUser());
+ region.getCoprocessorHost().preCleanupBulkLoad(getActiveUser());
- Path path = new Path(request.getBulkToken());
- if (!fs.delete(path, true)) {
- if (fs.exists(path)) {
- throw new IOException("Failed to clean up " + path);
- }
- }
- LOG.info("Cleaned up " + path + " successfully.");
- } finally {
- UserGroupInformation ugi = getActiveUser().getUGI();
- try {
- if (!UserGroupInformation.getLoginUser().equals(ugi) && !isUserReferenced(ugi)) {
- FileSystem.closeAllForUGI(ugi);
- }
- } catch (IOException e) {
- LOG.error("Failed to close FileSystem for: " + ugi, e);
+ Path path = new Path(request.getBulkToken());
+ if (!fs.delete(path, true)) {
+ if (fs.exists(path)) {
+ throw new IOException("Failed to clean up " + path);
}
}
+ LOG.trace("Cleaned up {} successfully.", path);
}
private Consumer<HRegion> fsCreatedListener;
@@ -280,6 +269,13 @@ public class SecureBulkLoadManager {
public Map<byte[], List<Path>> run() {
FileSystem fs = null;
try {
+ /*
+ * This is creating and caching a new FileSystem instance. Other code called
+ * "beneath" this method will rely on this FileSystem instance being in the
+ * cache. This is important as those methods make _no_ attempt to close this
+ * FileSystem instance. It is critical that here, in SecureBulkLoadManager,
+ * we are tracking the lifecycle and closing the FS when safe to do so.
+ */
fs = FileSystem.get(conf);
for(Pair<byte[], String> el: familyPaths) {
Path stageFamily = new Path(bulkToken, Bytes.toString(el.getFirst()));
@@ -304,6 +300,13 @@ public class SecureBulkLoadManager {
});
} finally {
decrementUgiReference(ugi);
+ try {
+ if (!UserGroupInformation.getLoginUser().equals(ugi) && !isUserReferenced(ugi)) {
+ FileSystem.closeAllForUGI(ugi);
+ }
+ } catch (IOException e) {
+ LOG.error("Failed to close FileSystem for: {}", ugi, e);
+ }
if (region.getCoprocessorHost() != null) {
region.getCoprocessorHost().postBulkLoadHFile(familyPaths, map);
}