You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by ma...@apache.org on 2022/06/27 15:11:07 UTC

[nifi-minifi-cpp] 02/03: MINIFICPP-1870 Replace IgnoreCaptureGroupZero with IncludeCaptureGroupZero

This is an automated email from the ASF dual-hosted git repository.

martinzink pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git

commit f1e1b486663d4f6de03dc8aa79670b8da17f7eb3
Author: Gabor Gyimesi <ga...@gmail.com>
AuthorDate: Mon Jun 27 16:18:21 2022 +0200

    MINIFICPP-1870 Replace IgnoreCaptureGroupZero with IncludeCaptureGroupZero
    
    Closes #1359
    
    Signed-off-by: Martin Zink <ma...@apache.org>
---
 .../standard-processors/processors/ExtractText.cpp | 22 +++----------
 .../standard-processors/processors/ExtractText.h   | 21 +++---------
 .../tests/unit/ExtractTextTests.cpp                | 38 +++++++++++++++-------
 3 files changed, 37 insertions(+), 44 deletions(-)

diff --git a/extensions/standard-processors/processors/ExtractText.cpp b/extensions/standard-processors/processors/ExtractText.cpp
index 9a0d86661..dc7be63e8 100644
--- a/extensions/standard-processors/processors/ExtractText.cpp
+++ b/extensions/standard-processors/processors/ExtractText.cpp
@@ -22,7 +22,6 @@
 #include <string>
 #include <memory>
 #include <map>
-#include <iostream>
 #include <sstream>
 #include <utility>
 
@@ -35,11 +34,7 @@
 #include "utils/gsl.h"
 #include "utils/RegexUtils.h"
 
-namespace org {
-namespace apache {
-namespace nifi {
-namespace minifi {
-namespace processors {
+namespace org::apache::nifi::minifi::processors {
 
 constexpr size_t MAX_BUFFER_SIZE = 4096;
 constexpr int MAX_CAPTURE_GROUP_SIZE = 1024;
@@ -57,7 +52,7 @@ core::Property ExtractText::RegexMode(
     ->withDescription("Set this to extract parts of flowfile content using regular experssions in dynamic properties")
     ->withDefaultValue<bool>(false)->build());
 
-core::Property ExtractText::IgnoreCaptureGroupZero(
+core::Property ExtractText::IncludeCaptureGroupZero(
     core::PropertyBuilder::createProperty("Include Capture Group 0")
     ->withDescription("Indicates that Capture Group 0 should be included as an attribute. "
                       "Capture Group 0 represents the entirety of the regular expression match, is typically not used, and could have considerable length.")
@@ -143,8 +138,7 @@ int64_t ExtractText::ReadCallback::operator()(const std::shared_ptr<io::BaseStre
       regex_flags.push_back(utils::Regex::Mode::ICASE);
     }
 
-    bool ignoregroupzero;
-    ctx_->getProperty(IgnoreCaptureGroupZero.getName(), ignoregroupzero);
+    const bool include_capture_group_zero = ctx_->getProperty<bool>(IncludeCaptureGroupZero).value_or(true);
 
     bool repeatingcapture;
     ctx_->getProperty(EnableRepeatingCaptureGroup.getName(), repeatingcapture);
@@ -171,9 +165,7 @@ int64_t ExtractText::ReadCallback::operator()(const std::shared_ptr<io::BaseStre
         utils::Regex rgx(value, regex_flags);
         utils::SMatch matches;
         while (utils::regexSearch(workStr, matches, rgx)) {
-          size_t i = ignoregroupzero ? 1 : 0;
-
-          for (; i < matches.size(); ++i, ++matchcount) {
+          for (std::size_t i = (include_capture_group_zero ? 0 : 1); i < matches.size(); ++i, ++matchcount) {
             std::string attributeValue = matches[i];
             if (attributeValue.length() > maxCaptureSize) {
               attributeValue = attributeValue.substr(0, maxCaptureSize);
@@ -212,8 +204,4 @@ ExtractText::ReadCallback::ReadCallback(std::shared_ptr<core::FlowFile> flowFile
 
 REGISTER_RESOURCE(ExtractText, Processor);
 
-}  // namespace processors
-}  // namespace minifi
-}  // namespace nifi
-}  // namespace apache
-}  // namespace org
+}  // namespace org::apache::nifi::minifi::processors
diff --git a/extensions/standard-processors/processors/ExtractText.h b/extensions/standard-processors/processors/ExtractText.h
index 49abbd56c..0d06a3c53 100644
--- a/extensions/standard-processors/processors/ExtractText.h
+++ b/extensions/standard-processors/processors/ExtractText.h
@@ -17,8 +17,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef EXTENSIONS_STANDARD_PROCESSORS_PROCESSORS_EXTRACTTEXT_H_
-#define EXTENSIONS_STANDARD_PROCESSORS_PROCESSORS_EXTRACTTEXT_H_
+#pragma once
 
 #include <memory>
 #include <string>
@@ -29,11 +28,7 @@
 #include "FlowFileRecord.h"
 #include "utils/Export.h"
 
-namespace org {
-namespace apache {
-namespace nifi {
-namespace minifi {
-namespace processors {
+namespace org::apache::nifi::minifi::processors {
 
 class ExtractText : public core::Processor {
  public:
@@ -46,7 +41,7 @@ class ExtractText : public core::Processor {
   EXTENSIONAPI static core::Property Attribute;
   EXTENSIONAPI static core::Property SizeLimit;
   EXTENSIONAPI static core::Property RegexMode;
-  EXTENSIONAPI static core::Property IgnoreCaptureGroupZero;
+  EXTENSIONAPI static core::Property IncludeCaptureGroupZero;
   EXTENSIONAPI static core::Property InsensitiveMatch;
   EXTENSIONAPI static core::Property MaxCaptureGroupLen;
   EXTENSIONAPI static core::Property EnableRepeatingCaptureGroup;
@@ -55,7 +50,7 @@ class ExtractText : public core::Processor {
       Attribute,
       SizeLimit,
       RegexMode,
-      IgnoreCaptureGroupZero,
+      IncludeCaptureGroupZero,
       InsensitiveMatch,
       MaxCaptureGroupLen,
       EnableRepeatingCaptureGroup
@@ -92,10 +87,4 @@ class ExtractText : public core::Processor {
   std::shared_ptr<core::logging::Logger> logger_ = core::logging::LoggerFactory<ExtractText>::getLogger();
 };
 
-}  // namespace processors
-}  // namespace minifi
-}  // namespace nifi
-}  // namespace apache
-}  // namespace org
-
-#endif  // EXTENSIONS_STANDARD_PROCESSORS_PROCESSORS_EXTRACTTEXT_H_
+}  // namespace org::apache::nifi::minifi::processors
diff --git a/extensions/standard-processors/tests/unit/ExtractTextTests.cpp b/extensions/standard-processors/tests/unit/ExtractTextTests.cpp
index f41f74e7a..e4796a661 100644
--- a/extensions/standard-processors/tests/unit/ExtractTextTests.cpp
+++ b/extensions/standard-processors/tests/unit/ExtractTextTests.cpp
@@ -17,7 +17,6 @@
  */
 #include <list>
 #include <fstream>
-#include <map>
 #include <memory>
 #include <utility>
 #include <string>
@@ -144,7 +143,6 @@ TEST_CASE("Test usage of ExtractText in regex mode", "[extracttextRegexTest]") {
 
   std::shared_ptr<core::Processor> maprocessor = plan->addProcessor("ExtractText", "testExtractText", core::Relationship("success", "description"), true);
   plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::RegexMode.getName(), "true");
-  plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::IgnoreCaptureGroupZero.getName(), "true");
   plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::EnableRepeatingCaptureGroup.getName(), "true");
   plan->setProperty(maprocessor, "RegexAttr", "Speed limit ([0-9]+)", true);
   plan->setProperty(maprocessor, "InvalidRegex", "[Invalid)A(F)", true);
@@ -162,17 +160,34 @@ TEST_CASE("Test usage of ExtractText in regex mode", "[extracttextRegexTest]") {
     test_file.close();
   }
 
-  plan->runNextProcessor();  // GetFile
-  plan->runNextProcessor();  // ExtractText
-  plan->runNextProcessor();  // LogAttribute
+  std::list<std::string> expected_logs;
+
+  SECTION("Do not include capture group 0") {
+    plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::IncludeCaptureGroupZero.getName(), "false");
+
+    testController.runSession(plan);
 
-  std::list<std::string> suffixes = { "", ".0", ".1" };
+    expected_logs = {
+      "key:RegexAttr value:130",
+      "key:RegexAttr.0 value:130",
+      "key:RegexAttr.1 value:80"
+    };
+  }
+
+  SECTION("Include capture group 0") {
+    testController.runSession(plan);
+
+    expected_logs = {
+      "key:RegexAttr value:Speed limit 130",
+      "key:RegexAttr.0 value:Speed limit 130",
+      "key:RegexAttr.1 value:130",
+      "key:RegexAttr.2 value:Speed limit 80",
+      "key:RegexAttr.3 value:80"
+    };
+  }
 
-  for (const auto& suffix : suffixes) {
-    ss.str("");
-    ss << "key:" << "RegexAttr" << suffix << " value:" << ((suffix == ".1") ? "80" : "130");
-    std::string log_check = ss.str();
-    REQUIRE(LogTestController::getInstance().contains(log_check));
+  for (const auto& log : expected_logs) {
+    REQUIRE(LogTestController::getInstance().contains(log));
   }
 
   std::string error_str = "error encountered when trying to construct regular expression from property (key: InvalidRegex)";
@@ -199,6 +214,7 @@ TEST_CASE("Test usage of ExtractText in regex mode with large regex matches", "[
 
   auto extract_text_processor = plan->addProcessor("ExtractText", "ExtractText", core::Relationship("success", "description"), true);
   plan->setProperty(extract_text_processor, org::apache::nifi::minifi::processors::ExtractText::RegexMode.getName(), "true");
+  plan->setProperty(extract_text_processor, org::apache::nifi::minifi::processors::ExtractText::IncludeCaptureGroupZero.getName(), "false");
   plan->setProperty(extract_text_processor, "RegexAttr", "Speed limit (.*)", true);
 
   auto log_attribute_processor = plan->addProcessor("LogAttribute", "outputLogAttribute", core::Relationship("success", "description"), true);