You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by yi...@apache.org on 2023/06/29 01:06:06 UTC

[doris] branch master updated: [fix](catalog) disble FileSystem Cache to avoid too many fs cache (#21283)

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9af714bceb [fix](catalog) disble FileSystem Cache to avoid too many fs cache (#21283)
9af714bceb is described below

commit 9af714bcebe1b5e6c6904f9e4232cfefa9f05689
Author: Mingyu Chen <mo...@163.com>
AuthorDate: Thu Jun 29 09:06:00 2023 +0800

    [fix](catalog) disble FileSystem Cache to avoid too many fs cache (#21283)
    
    When creating a new hive catalog or refresh the hive catalog, it will refresh the HiveMetaStore cache.
    And it will call "FileInputFormat.setInputPaths()".
    In this method, it will create a new FileSystem instance and store it in FileSystem's cache.
    So if refresh catalog frequently, there will be too many FileSystem instances in cache, causing OOM.
    
    This PR disable the FileSystem Cache.
---
 .../doris/datasource/hive/HiveMetaStoreCache.java     | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 5777cf49a0..d64cdf477b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -78,6 +78,7 @@ import org.apache.logging.log4j.Logger;
 
 import java.io.FileNotFoundException;
 import java.io.UnsupportedEncodingException;
+import java.net.URI;
 import java.net.URLDecoder;
 import java.nio.charset.StandardCharsets;
 import java.security.PrivilegedExceptionAction;
@@ -328,6 +329,17 @@ public class HiveMetaStoreCache {
         try {
             Thread.currentThread().setContextClassLoader(ClassLoader.getSystemClassLoader());
             String finalLocation = S3Util.convertToS3IfNecessary(key.location);
+            // disable the fs cache in FileSystem, or it will always from new FileSystem
+            // and save it in cache when calling FileInputFormat.setInputPaths().
+            try {
+                Path path = new Path(finalLocation);
+                URI uri = path.toUri();
+                if (uri.getScheme() != null) {
+                    updateJobConf("fs." + uri.getScheme() + ".impl.disable.cache", "true");
+                }
+            } catch (Exception e) {
+                LOG.warn("unknown scheme in path: " + finalLocation, e);
+            }
             FileInputFormat.setInputPaths(jobConf, finalLocation);
             try {
                 FileCacheValue result;
@@ -381,6 +393,13 @@ public class HiveMetaStoreCache {
         // Otherwise, getSplits() may throw exception: "Not a file xxx"
         // https://blog.actorsfit.com/a?ID=00550-ce56ec63-1bff-4b0c-a6f7-447b93efaa31
         jobConf.set("mapreduce.input.fileinputformat.input.dir.recursive", "true");
+        // disable FileSystem's cache
+        jobConf.set("fs.hdfs.impl.disable.cache", "true");
+        jobConf.set("fs.file.impl.disable.cache", "true");
+    }
+
+    private synchronized void updateJobConf(String key, String value) {
+        jobConf.set(key, value);
     }
 
     public HivePartitionValues getPartitionValues(String dbName, String tblName, List<Type> types) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org