You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by "spaces-X (via GitHub)" <gi...@apache.org> on 2023/06/08 06:52:14 UTC

[GitHub] [doris] spaces-X commented on a diff in pull request #20327: [WIP](load) new build segment tool

spaces-X commented on code in PR #20327:
URL: https://github.com/apache/doris/pull/20327#discussion_r1222530731


##########
be/src/tools/build_segment_tool/build_helper.cpp:
##########
@@ -0,0 +1,196 @@
+#include "tools/build_segment_tool/build_helper.h"
+
+#include <cstdlib>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <set>
+#include <sstream>
+#include <string>
+
+#include "common/status.h"
+#include "common/config.h"
+#include "olap/storage_engine.h"
+#include "olap/tablet.h"
+#include "olap/tablet_meta.h"
+#include "olap/tablet_manager.h"
+#include "olap/tablet_schema_cache.h"
+#include "olap/file_header.h"
+#include "runtime/exec_env.h"
+#include "tools/build_segment_tool/builder_scanner_memtable.h"
+#include "util/disk_info.h"
+#include "util/mem_info.h"
+
+namespace doris {
+
+BuildHelper* BuildHelper::_s_instance = nullptr;
+
+BuildHelper* BuildHelper::init_instance() {
+    // DCHECK(_s_instance == nullptr);
+    static BuildHelper instance;
+    _s_instance = &instance;
+    return _s_instance;
+}
+
+void BuildHelper::initial_build_env() {
+    char doris_home[] = "DORIS_HOME=/tmp";
+    putenv(doris_home);
+
+    if (!doris::config::init(nullptr, true, false, true)) {
+        LOG(FATAL) << "init config fail";
+        exit(-1);
+    }
+    CpuInfo::init();
+    DiskInfo::init();
+    MemInfo::init();
+    // write buffer size before flush
+    config::write_buffer_size = 209715200;
+    // max buffer size used in memtable for the aggregated table
+    config::write_buffer_size_for_agg = 8194304000;
+    // CONF_mInt64(memtable_max_buffer_size, "8194304000");
+
+    // std::shared_ptr<doris::MemTrackerLimiter> process_mem_tracker =
+    //         std::make_shared<doris::MemTrackerLimiter>(MemTrackerLimiter::Type::GLOBAL, "Process");
+    // doris::ExecEnv::GetInstance()->set_orphan_mem_tracker(process_mem_tracker);
+    // doris::thread_context()->thread_mem_tracker_mgr->attach_limiter_tracker(process_mem_tracker,
+    //                                                                         TUniqueId());
+
+    // doris::thread_context()->thread_mem_tracker_mgr->init();
+    // doris::thread_context()->thread_mem_tracker_mgr->set_check_limit(false);
+    doris::TabletSchemaCache::create_global_schema_cache();
+    doris::ChunkAllocator::init_instance(4096);
+
+}
+
+void BuildHelper::open(const std::string& meta_file, const std::string& build_dir,
+                         const std::string& data_path, const std::string& file_type) {
+    _meta_file = meta_file;
+    _build_dir = build_dir;
+    if (data_path.at(data_path.size() - 1) != '/') {
+        _data_path = data_path + "/";
+    } else {
+        _data_path = data_path;
+    }
+
+    _file_type = file_type;
+
+    std::filesystem::path dir_path(std::filesystem::absolute(std::filesystem::path(build_dir)));
+    if (!std::filesystem::is_directory(std::filesystem::status(dir_path))) {
+        LOG(FATAL) << "build dir should be a directory";
+    }
+
+    // init and open storage engine
+    std::vector<doris::StorePath> paths;
+    auto olap_res = doris::parse_conf_store_paths(_build_dir, &paths);
+    if (!olap_res) {
+        LOG(FATAL) << "parse config storage path failed, path=" << doris::config::storage_root_path;
+        exit(-1);
+    }
+    doris::ExecEnv::init(doris::ExecEnv::GetInstance(), paths);
+
+    doris::EngineOptions options;
+    options.store_paths = paths;
+    options.backend_uid = doris::UniqueId::gen_uid();
+    doris::StorageEngine* engine = nullptr;
+    auto st = doris::StorageEngine::open(options, &engine);
+    if (!st.ok()) {
+        LOG(FATAL) << "fail to open StorageEngine, res=" << st;
+        exit(-1);
+    }
+}
+
+std::string BuildHelper::read_local_file(const std::string& file) {

Review Comment:
   useless function?



##########
be/src/tools/build_segment_tool/build_helper.cpp:
##########
@@ -0,0 +1,196 @@
+#include "tools/build_segment_tool/build_helper.h"
+
+#include <cstdlib>
+#include <filesystem>
+#include <fstream>
+#include <iostream>
+#include <set>
+#include <sstream>
+#include <string>
+
+#include "common/status.h"
+#include "common/config.h"
+#include "olap/storage_engine.h"
+#include "olap/tablet.h"
+#include "olap/tablet_meta.h"
+#include "olap/tablet_manager.h"
+#include "olap/tablet_schema_cache.h"
+#include "olap/file_header.h"
+#include "runtime/exec_env.h"
+#include "tools/build_segment_tool/builder_scanner_memtable.h"
+#include "util/disk_info.h"
+#include "util/mem_info.h"
+
+namespace doris {
+
+BuildHelper* BuildHelper::_s_instance = nullptr;
+
+BuildHelper* BuildHelper::init_instance() {
+    // DCHECK(_s_instance == nullptr);
+    static BuildHelper instance;
+    _s_instance = &instance;
+    return _s_instance;
+}
+
+void BuildHelper::initial_build_env() {
+    char doris_home[] = "DORIS_HOME=/tmp";
+    putenv(doris_home);
+
+    if (!doris::config::init(nullptr, true, false, true)) {
+        LOG(FATAL) << "init config fail";
+        exit(-1);
+    }
+    CpuInfo::init();
+    DiskInfo::init();
+    MemInfo::init();
+    // write buffer size before flush
+    config::write_buffer_size = 209715200;
+    // max buffer size used in memtable for the aggregated table
+    config::write_buffer_size_for_agg = 8194304000;
+    // CONF_mInt64(memtable_max_buffer_size, "8194304000");
+
+    // std::shared_ptr<doris::MemTrackerLimiter> process_mem_tracker =
+    //         std::make_shared<doris::MemTrackerLimiter>(MemTrackerLimiter::Type::GLOBAL, "Process");
+    // doris::ExecEnv::GetInstance()->set_orphan_mem_tracker(process_mem_tracker);
+    // doris::thread_context()->thread_mem_tracker_mgr->attach_limiter_tracker(process_mem_tracker,
+    //                                                                         TUniqueId());
+
+    // doris::thread_context()->thread_mem_tracker_mgr->init();
+    // doris::thread_context()->thread_mem_tracker_mgr->set_check_limit(false);
+    doris::TabletSchemaCache::create_global_schema_cache();
+    doris::ChunkAllocator::init_instance(4096);
+
+}
+
+void BuildHelper::open(const std::string& meta_file, const std::string& build_dir,
+                         const std::string& data_path, const std::string& file_type) {
+    _meta_file = meta_file;
+    _build_dir = build_dir;
+    if (data_path.at(data_path.size() - 1) != '/') {
+        _data_path = data_path + "/";
+    } else {
+        _data_path = data_path;
+    }
+
+    _file_type = file_type;
+
+    std::filesystem::path dir_path(std::filesystem::absolute(std::filesystem::path(build_dir)));
+    if (!std::filesystem::is_directory(std::filesystem::status(dir_path))) {
+        LOG(FATAL) << "build dir should be a directory";
+    }
+
+    // init and open storage engine
+    std::vector<doris::StorePath> paths;
+    auto olap_res = doris::parse_conf_store_paths(_build_dir, &paths);
+    if (!olap_res) {
+        LOG(FATAL) << "parse config storage path failed, path=" << doris::config::storage_root_path;
+        exit(-1);
+    }
+    doris::ExecEnv::init(doris::ExecEnv::GetInstance(), paths);
+
+    doris::EngineOptions options;
+    options.store_paths = paths;
+    options.backend_uid = doris::UniqueId::gen_uid();
+    doris::StorageEngine* engine = nullptr;
+    auto st = doris::StorageEngine::open(options, &engine);
+    if (!st.ok()) {
+        LOG(FATAL) << "fail to open StorageEngine, res=" << st;
+        exit(-1);
+    }
+}
+
+std::string BuildHelper::read_local_file(const std::string& file) {
+    std::filesystem::path path(std::filesystem::absolute(std::filesystem::path(file)));
+    if (!std::filesystem::exists(path)) { LOG(FATAL) << "file not exist:" << file;
+    }
+
+    std::ifstream f(path, std::ios::in | std::ios::binary);
+    const auto sz = std::filesystem::file_size(path);
+    std::string result(sz, '\0');

Review Comment:
   It will cause out of memory, when the file is too large.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org