You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by sz...@apache.org on 2022/05/13 14:37:18 UTC
[nifi-minifi-cpp] 01/03: MINIFICPP-1806 Use boyer_moore for extension verification
This is an automated email from the ASF dual-hosted git repository.
szaszm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git
commit e41fc9e09033639ceab1a5d1992dbb3fc71a1ad7
Author: Adam Debreceni <ad...@apache.org>
AuthorDate: Fri May 13 16:30:41 2022 +0200
MINIFICPP-1806 Use boyer_moore for extension verification
For a debug build the extension verification for libcore-minifi.so took
~1400 ms (which together with other dynamic libraries adds seconds to
the startup time during development), with this change it takes ~100 ms.
Alternatives considered:
- boyer_moore + custom span concat: ~150 ms
- boyer_moore + ranges::concat: ~1700 ms
- boyer_moore + mmap: ~100 ms (the mmap wrapper would add significant
complexity)
Closes #1310
Signed-off-by: Marton Szasz <sz...@apache.org>
---
libminifi/src/utils/file/FileUtils.cpp | 45 ++++++++++++----------------------
1 file changed, 15 insertions(+), 30 deletions(-)
diff --git a/libminifi/src/utils/file/FileUtils.cpp b/libminifi/src/utils/file/FileUtils.cpp
index 33602a9fe..d141da61c 100644
--- a/libminifi/src/utils/file/FileUtils.cpp
+++ b/libminifi/src/utils/file/FileUtils.cpp
@@ -22,6 +22,9 @@
#include <algorithm>
#include <iostream>
+#include "utils/Literals.h"
+#include "utils/Searcher.h"
+
namespace org {
namespace apache {
namespace nifi {
@@ -49,41 +52,23 @@ uint64_t computeChecksum(const std::string &file_name, uint64_t up_to_position)
}
bool contains(const std::filesystem::path& file_path, std::string_view text_to_search) {
- gsl_Expects(text_to_search.size() <= 8192);
+ gsl_Expects(text_to_search.size() <= 8_KiB);
gsl_ExpectsAudit(std::filesystem::exists(file_path));
- std::array<char, 8192> buf1{};
- std::array<char, 8192> buf2{};
- gsl::span<char> left = buf1;
- gsl::span<char> right = buf2;
-
- const auto charat = [&](size_t idx) {
- if (idx < left.size()) {
- return left[idx];
- } else if (idx < left.size() + right.size()) {
- return right[idx - left.size()];
- } else {
- return '\0';
- }
- };
- const auto check_range = [&](size_t start, size_t end) -> size_t {
- for (size_t i = start; i < end; ++i) {
- size_t j{};
- for (j = 0; j < text_to_search.size(); ++j) {
- if (charat(i + j) != text_to_search[j]) break;
- }
- if (j == text_to_search.size()) return true;
- }
- return false;
- };
+ std::array<char, 16_KiB> buf{};
+ gsl::span<char> view;
+
+ Searcher searcher(text_to_search.begin(), text_to_search.end());
std::ifstream ifs{file_path, std::ios::binary};
- ifs.read(right.data(), gsl::narrow<std::streamsize>(right.size()));
do {
- std::swap(left, right);
- ifs.read(right.data(), gsl::narrow<std::streamsize>(right.size()));
- if (check_range(0, left.size())) return true;
+ std::copy(buf.end() - text_to_search.size(), buf.end(), buf.begin());
+ ifs.read(buf.data() + text_to_search.size(), buf.size() - text_to_search.size());
+ view = gsl::span<char>(buf.data(), text_to_search.size() + gsl::narrow<size_t>(ifs.gcount()));
+ if (std::search(view.begin(), view.end(), searcher) != view.end()) {
+ return true;
+ }
} while (ifs);
- return check_range(left.size(), left.size() + right.size());
+ return std::search(view.begin(), view.end(), searcher) != view.end();
}
time_t to_time_t(std::filesystem::file_time_type file_time) {