You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by "morningman (via GitHub)" <gi...@apache.org> on 2023/04/16 15:04:42 UTC

[GitHub] [doris] morningman commented on a diff in pull request #18652: [improve] Refactor file cache and Improve the file cache strategy

morningman commented on code in PR #18652:
URL: https://github.com/apache/doris/pull/18652#discussion_r1167931288


##########
be/src/common/config.h:
##########
@@ -898,9 +898,8 @@ CONF_Bool(enable_index_apply_preds_except_leafnode_of_andnode, "true");
 
 // block file cache
 CONF_Bool(enable_file_cache, "false");
-// format: [{"path":"/path/to/file_cache","normal":21474836480,"persistent":10737418240,"query_limit":10737418240}]
+// format: [{"path":"/mnt/disk3/selectdb_cloud/file_cache","total_size":21474836480,"query_limit":10737418240}]

Review Comment:
   better use a common name for example.
   And please also give example for multi cache path



##########
be/src/io/cache/block/block_file_cache_settings.h:
##########
@@ -25,15 +25,19 @@
 namespace doris {
 namespace io {
 
+// [query:disposable:index:total]
+constexpr std::array<size_t, 4> percentage {17, 2, 1, 20};
+static_assert(percentage[0] + percentage[1] + percentage[2] == percentage[3]);
 struct FileCacheSettings {
-    size_t max_size = 0;
-    size_t max_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS;
-    // use a priority policy to eliminate
-    size_t persistent_max_size = 0;
-    size_t persistent_max_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_ELEMENTS;
-
-    size_t max_file_segment_size = 0;
-    size_t max_query_cache_size = 0;
+    size_t total_size {0};

Review Comment:
   Better add comment to explain `query`, `disposable` and `index`



##########
be/src/common/config.h:
##########
@@ -898,9 +898,8 @@ CONF_Bool(enable_index_apply_preds_except_leafnode_of_andnode, "true");
 
 // block file cache
 CONF_Bool(enable_file_cache, "false");
-// format: [{"path":"/path/to/file_cache","normal":21474836480,"persistent":10737418240,"query_limit":10737418240}]
+// format: [{"path":"/mnt/disk3/selectdb_cloud/file_cache","total_size":21474836480,"query_limit":10737418240}]

Review Comment:
   And need to update the doc: https://doris.apache.org/zh-CN/docs/dev/lakehouse/filecache



##########
be/src/olap/options.cpp:
##########
@@ -174,45 +173,59 @@ Status parse_conf_cache_paths(const std::string& config_path, std::vector<CacheP
         auto map = config.GetObject();
         DCHECK(map.HasMember(CACHE_PATH.c_str()));
         std::string path = map.FindMember(CACHE_PATH.c_str())->value.GetString();
-        int64_t normal_size = map.HasMember(CACHE_NORMAL_SIZE.c_str())
-                                      ? map.FindMember(CACHE_NORMAL_SIZE.c_str())->value.GetInt64()
-                                      : 0;
-        int64_t persistent_size =
-                map.HasMember(CACHE_PERSISTENT_SIZE.c_str())
-                        ? map.FindMember(CACHE_PERSISTENT_SIZE.c_str())->value.GetInt64()
-                        : 0;
-        int64_t query_limit_bytes = 0;
+        int64_t total_size = 0, query_limit_bytes = 0;
+        if (map.HasMember(CACHE_TOTAL_SIZE.c_str())) {
+            auto& value = map.FindMember(CACHE_TOTAL_SIZE.c_str())->value;
+            if (value.IsInt64()) {
+                total_size = value.GetInt64();
+            } else {
+                return Status::InvalidArgument("normal should be int64");

Review Comment:
   ```suggestion
                   return Status::InvalidArgument("normal cache size should be int64");
   ```



##########
be/src/olap/options.cpp:
##########
@@ -174,45 +173,59 @@ Status parse_conf_cache_paths(const std::string& config_path, std::vector<CacheP
         auto map = config.GetObject();
         DCHECK(map.HasMember(CACHE_PATH.c_str()));
         std::string path = map.FindMember(CACHE_PATH.c_str())->value.GetString();
-        int64_t normal_size = map.HasMember(CACHE_NORMAL_SIZE.c_str())
-                                      ? map.FindMember(CACHE_NORMAL_SIZE.c_str())->value.GetInt64()
-                                      : 0;
-        int64_t persistent_size =
-                map.HasMember(CACHE_PERSISTENT_SIZE.c_str())
-                        ? map.FindMember(CACHE_PERSISTENT_SIZE.c_str())->value.GetInt64()
-                        : 0;
-        int64_t query_limit_bytes = 0;
+        int64_t total_size = 0, query_limit_bytes = 0;
+        if (map.HasMember(CACHE_TOTAL_SIZE.c_str())) {
+            auto& value = map.FindMember(CACHE_TOTAL_SIZE.c_str())->value;
+            if (value.IsInt64()) {
+                total_size = value.GetInt64();
+            } else {
+                return Status::InvalidArgument("normal should be int64");
+            }
+        }
         if (config::enable_file_cache_query_limit) {
-            query_limit_bytes =
-                    map.HasMember(CACHE_QUERY_LIMIT_SIZE.c_str())
-                            ? map.FindMember(CACHE_QUERY_LIMIT_SIZE.c_str())->value.GetInt64()
-                            : normal_size / 5;
+            if (map.HasMember(CACHE_QUERY_LIMIT_SIZE.c_str())) {
+                auto& value = map.FindMember(CACHE_QUERY_LIMIT_SIZE.c_str())->value;
+                if (value.IsInt64()) {
+                    query_limit_bytes = value.GetInt64();
+                } else {
+                    return Status::InvalidArgument("query_limit should be int64");
+                }
+            }
         }
-        if (normal_size <= 0 || persistent_size <= 0) {
-            LOG(WARNING) << "normal or persistent size should not less than or equal to zero";
-            return Status::InternalError("OLAP_ERR_INPUT_PARAMETER_ERROR");
+        if (total_size <= 0 || (config::enable_file_cache_query_limit && query_limit_bytes <= 0)) {
+            return Status::InvalidArgument(
+                    "total_size or query_limit should not less than or equal to zero");
         }
-        paths.emplace_back(std::move(path), normal_size, persistent_size, query_limit_bytes);
+        paths.emplace_back(std::move(path), total_size, query_limit_bytes);
     }
     if (paths.empty()) {
-        LOG(WARNING) << "fail to parse storage_root_path config. value=[" << config_path << "]";
-        return Status::InternalError("OLAP_ERR_INPUT_PARAMETER_ERROR");
+        return Status::InvalidArgument("fail to parse storage_root_path config. value={}",
+                                       config_path);
     }
     return Status::OK();
 }
 
 io::FileCacheSettings CachePath::init_settings() const {
     io::FileCacheSettings settings;
-    settings.max_size = normal_bytes;
-    settings.persistent_max_size = persistent_bytes;
+    settings.total_size = total_bytes;
     settings.max_file_segment_size = config::file_cache_max_file_segment_size;
-
-    settings.max_elements = std::max<size_t>(
-            normal_bytes / config::file_cache_max_file_segment_size, settings.max_elements);
-    settings.persistent_max_elements =
-            std::max<size_t>(persistent_bytes / config::file_cache_max_file_segment_size,
-                             settings.persistent_max_elements);
     settings.max_query_cache_size = query_limit_bytes;
+    size_t per_size = settings.total_size / io::percentage[3];
+    settings.disposable_queue_size = per_size * io::percentage[1];
+    settings.disposable_queue_elements =

Review Comment:
   better to config the `queue_elements`, not calculate it.
   Because the `max_file_segment_size` may be vary in different cases.
   PTAL @AshinGau 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org