You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by ma...@apache.org on 2022/06/27 15:11:07 UTC
[nifi-minifi-cpp] 02/03: MINIFICPP-1870 Replace IgnoreCaptureGroupZero with IncludeCaptureGroupZero
This is an automated email from the ASF dual-hosted git repository.
martinzink pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git
commit f1e1b486663d4f6de03dc8aa79670b8da17f7eb3
Author: Gabor Gyimesi <ga...@gmail.com>
AuthorDate: Mon Jun 27 16:18:21 2022 +0200
MINIFICPP-1870 Replace IgnoreCaptureGroupZero with IncludeCaptureGroupZero
Closes #1359
Signed-off-by: Martin Zink <ma...@apache.org>
---
.../standard-processors/processors/ExtractText.cpp | 22 +++----------
.../standard-processors/processors/ExtractText.h | 21 +++---------
.../tests/unit/ExtractTextTests.cpp | 38 +++++++++++++++-------
3 files changed, 37 insertions(+), 44 deletions(-)
diff --git a/extensions/standard-processors/processors/ExtractText.cpp b/extensions/standard-processors/processors/ExtractText.cpp
index 9a0d86661..dc7be63e8 100644
--- a/extensions/standard-processors/processors/ExtractText.cpp
+++ b/extensions/standard-processors/processors/ExtractText.cpp
@@ -22,7 +22,6 @@
#include <string>
#include <memory>
#include <map>
-#include <iostream>
#include <sstream>
#include <utility>
@@ -35,11 +34,7 @@
#include "utils/gsl.h"
#include "utils/RegexUtils.h"
-namespace org {
-namespace apache {
-namespace nifi {
-namespace minifi {
-namespace processors {
+namespace org::apache::nifi::minifi::processors {
constexpr size_t MAX_BUFFER_SIZE = 4096;
constexpr int MAX_CAPTURE_GROUP_SIZE = 1024;
@@ -57,7 +52,7 @@ core::Property ExtractText::RegexMode(
->withDescription("Set this to extract parts of flowfile content using regular experssions in dynamic properties")
->withDefaultValue<bool>(false)->build());
-core::Property ExtractText::IgnoreCaptureGroupZero(
+core::Property ExtractText::IncludeCaptureGroupZero(
core::PropertyBuilder::createProperty("Include Capture Group 0")
->withDescription("Indicates that Capture Group 0 should be included as an attribute. "
"Capture Group 0 represents the entirety of the regular expression match, is typically not used, and could have considerable length.")
@@ -143,8 +138,7 @@ int64_t ExtractText::ReadCallback::operator()(const std::shared_ptr<io::BaseStre
regex_flags.push_back(utils::Regex::Mode::ICASE);
}
- bool ignoregroupzero;
- ctx_->getProperty(IgnoreCaptureGroupZero.getName(), ignoregroupzero);
+ const bool include_capture_group_zero = ctx_->getProperty<bool>(IncludeCaptureGroupZero).value_or(true);
bool repeatingcapture;
ctx_->getProperty(EnableRepeatingCaptureGroup.getName(), repeatingcapture);
@@ -171,9 +165,7 @@ int64_t ExtractText::ReadCallback::operator()(const std::shared_ptr<io::BaseStre
utils::Regex rgx(value, regex_flags);
utils::SMatch matches;
while (utils::regexSearch(workStr, matches, rgx)) {
- size_t i = ignoregroupzero ? 1 : 0;
-
- for (; i < matches.size(); ++i, ++matchcount) {
+ for (std::size_t i = (include_capture_group_zero ? 0 : 1); i < matches.size(); ++i, ++matchcount) {
std::string attributeValue = matches[i];
if (attributeValue.length() > maxCaptureSize) {
attributeValue = attributeValue.substr(0, maxCaptureSize);
@@ -212,8 +204,4 @@ ExtractText::ReadCallback::ReadCallback(std::shared_ptr<core::FlowFile> flowFile
REGISTER_RESOURCE(ExtractText, Processor);
-} // namespace processors
-} // namespace minifi
-} // namespace nifi
-} // namespace apache
-} // namespace org
+} // namespace org::apache::nifi::minifi::processors
diff --git a/extensions/standard-processors/processors/ExtractText.h b/extensions/standard-processors/processors/ExtractText.h
index 49abbd56c..0d06a3c53 100644
--- a/extensions/standard-processors/processors/ExtractText.h
+++ b/extensions/standard-processors/processors/ExtractText.h
@@ -17,8 +17,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#ifndef EXTENSIONS_STANDARD_PROCESSORS_PROCESSORS_EXTRACTTEXT_H_
-#define EXTENSIONS_STANDARD_PROCESSORS_PROCESSORS_EXTRACTTEXT_H_
+#pragma once
#include <memory>
#include <string>
@@ -29,11 +28,7 @@
#include "FlowFileRecord.h"
#include "utils/Export.h"
-namespace org {
-namespace apache {
-namespace nifi {
-namespace minifi {
-namespace processors {
+namespace org::apache::nifi::minifi::processors {
class ExtractText : public core::Processor {
public:
@@ -46,7 +41,7 @@ class ExtractText : public core::Processor {
EXTENSIONAPI static core::Property Attribute;
EXTENSIONAPI static core::Property SizeLimit;
EXTENSIONAPI static core::Property RegexMode;
- EXTENSIONAPI static core::Property IgnoreCaptureGroupZero;
+ EXTENSIONAPI static core::Property IncludeCaptureGroupZero;
EXTENSIONAPI static core::Property InsensitiveMatch;
EXTENSIONAPI static core::Property MaxCaptureGroupLen;
EXTENSIONAPI static core::Property EnableRepeatingCaptureGroup;
@@ -55,7 +50,7 @@ class ExtractText : public core::Processor {
Attribute,
SizeLimit,
RegexMode,
- IgnoreCaptureGroupZero,
+ IncludeCaptureGroupZero,
InsensitiveMatch,
MaxCaptureGroupLen,
EnableRepeatingCaptureGroup
@@ -92,10 +87,4 @@ class ExtractText : public core::Processor {
std::shared_ptr<core::logging::Logger> logger_ = core::logging::LoggerFactory<ExtractText>::getLogger();
};
-} // namespace processors
-} // namespace minifi
-} // namespace nifi
-} // namespace apache
-} // namespace org
-
-#endif // EXTENSIONS_STANDARD_PROCESSORS_PROCESSORS_EXTRACTTEXT_H_
+} // namespace org::apache::nifi::minifi::processors
diff --git a/extensions/standard-processors/tests/unit/ExtractTextTests.cpp b/extensions/standard-processors/tests/unit/ExtractTextTests.cpp
index f41f74e7a..e4796a661 100644
--- a/extensions/standard-processors/tests/unit/ExtractTextTests.cpp
+++ b/extensions/standard-processors/tests/unit/ExtractTextTests.cpp
@@ -17,7 +17,6 @@
*/
#include <list>
#include <fstream>
-#include <map>
#include <memory>
#include <utility>
#include <string>
@@ -144,7 +143,6 @@ TEST_CASE("Test usage of ExtractText in regex mode", "[extracttextRegexTest]") {
std::shared_ptr<core::Processor> maprocessor = plan->addProcessor("ExtractText", "testExtractText", core::Relationship("success", "description"), true);
plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::RegexMode.getName(), "true");
- plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::IgnoreCaptureGroupZero.getName(), "true");
plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::EnableRepeatingCaptureGroup.getName(), "true");
plan->setProperty(maprocessor, "RegexAttr", "Speed limit ([0-9]+)", true);
plan->setProperty(maprocessor, "InvalidRegex", "[Invalid)A(F)", true);
@@ -162,17 +160,34 @@ TEST_CASE("Test usage of ExtractText in regex mode", "[extracttextRegexTest]") {
test_file.close();
}
- plan->runNextProcessor(); // GetFile
- plan->runNextProcessor(); // ExtractText
- plan->runNextProcessor(); // LogAttribute
+ std::list<std::string> expected_logs;
+
+ SECTION("Do not include capture group 0") {
+ plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::IncludeCaptureGroupZero.getName(), "false");
+
+ testController.runSession(plan);
- std::list<std::string> suffixes = { "", ".0", ".1" };
+ expected_logs = {
+ "key:RegexAttr value:130",
+ "key:RegexAttr.0 value:130",
+ "key:RegexAttr.1 value:80"
+ };
+ }
+
+ SECTION("Include capture group 0") {
+ testController.runSession(plan);
+
+ expected_logs = {
+ "key:RegexAttr value:Speed limit 130",
+ "key:RegexAttr.0 value:Speed limit 130",
+ "key:RegexAttr.1 value:130",
+ "key:RegexAttr.2 value:Speed limit 80",
+ "key:RegexAttr.3 value:80"
+ };
+ }
- for (const auto& suffix : suffixes) {
- ss.str("");
- ss << "key:" << "RegexAttr" << suffix << " value:" << ((suffix == ".1") ? "80" : "130");
- std::string log_check = ss.str();
- REQUIRE(LogTestController::getInstance().contains(log_check));
+ for (const auto& log : expected_logs) {
+ REQUIRE(LogTestController::getInstance().contains(log));
}
std::string error_str = "error encountered when trying to construct regular expression from property (key: InvalidRegex)";
@@ -199,6 +214,7 @@ TEST_CASE("Test usage of ExtractText in regex mode with large regex matches", "[
auto extract_text_processor = plan->addProcessor("ExtractText", "ExtractText", core::Relationship("success", "description"), true);
plan->setProperty(extract_text_processor, org::apache::nifi::minifi::processors::ExtractText::RegexMode.getName(), "true");
+ plan->setProperty(extract_text_processor, org::apache::nifi::minifi::processors::ExtractText::IncludeCaptureGroupZero.getName(), "false");
plan->setProperty(extract_text_processor, "RegexAttr", "Speed limit (.*)", true);
auto log_attribute_processor = plan->addProcessor("LogAttribute", "outputLogAttribute", core::Relationship("success", "description"), true);