You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by sz...@apache.org on 2022/05/11 15:21:00 UTC

[nifi-minifi-cpp] 01/04: MINIFICPP-1687 Signal error on UUID collision

This is an automated email from the ASF dual-hosted git repository.

szaszm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git

commit 6e573f7373f5b36b06b6317978a8b691702257bf
Author: Gabor Gyimesi <ga...@gmail.com>
AuthorDate: Wed May 11 15:28:12 2022 +0200

    MINIFICPP-1687 Signal error on UUID collision
    
    Closes #1316
    Signed-off-by: Marton Szasz <sz...@apache.org>
---
 .../tests/unit/YamlConfigurationTests.cpp          | 64 ++++++++++++++++++++
 libminifi/include/core/yaml/CheckRequiredField.h   |  4 +-
 libminifi/include/core/yaml/YamlConfiguration.h    |  4 ++
 libminifi/src/core/yaml/CheckRequiredField.cpp     | 12 ++--
 libminifi/src/core/yaml/YamlConfiguration.cpp      | 69 +++++++++++++---------
 libminifi/test/resources/TestHTTPSiteToSite.yml    |  8 +--
 6 files changed, 121 insertions(+), 40 deletions(-)

diff --git a/extensions/standard-processors/tests/unit/YamlConfigurationTests.cpp b/extensions/standard-processors/tests/unit/YamlConfigurationTests.cpp
index 62b88ce7a..0df17ee62 100644
--- a/extensions/standard-processors/tests/unit/YamlConfigurationTests.cpp
+++ b/extensions/standard-processors/tests/unit/YamlConfigurationTests.cpp
@@ -27,6 +27,7 @@
 #include "TestBase.h"
 #include "Catch.h"
 #include "utils/TestUtils.h"
+#include "utils/StringUtils.h"
 
 using namespace std::literals::chrono_literals;
 
@@ -867,3 +868,66 @@ Remote Process Groups: []
     REQUIRE(it.second->getSource());
   }
 }
+
+TEST_CASE("Test UUID duplication checks", "[YamlConfiguration]") {
+  TestController test_controller;
+  std::shared_ptr<core::Repository> test_prov_repo = core::createRepository("provenancerepository", true);
+  std::shared_ptr<core::Repository> test_flow_file_repo = core::createRepository("flowfilerepository", true);
+  std::shared_ptr<minifi::Configure> configuration = std::make_shared<minifi::Configure>();
+  std::shared_ptr<minifi::io::StreamFactory> stream_factory = minifi::io::StreamFactory::getInstance(configuration);
+  std::shared_ptr<core::ContentRepository> content_repo = std::make_shared<core::repository::VolatileContentRepository>();
+  core::YamlConfiguration yaml_config(test_prov_repo, test_flow_file_repo, content_repo, stream_factory, configuration);
+
+  for (char i = '1'; i <= '8'; ++i) {
+    DYNAMIC_SECTION("Changing UUID 00000000-0000-0000-0000-00000000000" << i << " to be a duplicate") {
+      std::string config_yaml =
+        R"(
+          Flow Controller:
+            name: root
+            comment: ''
+          Processors:
+          - id: 00000000-0000-0000-0000-000000000001
+            name: GenerateFlowFile1
+            class: org.apache.nifi.minifi.processors.GenerateFlowFile
+          - id: 00000000-0000-0000-0000-000000000002
+            name: LogAttribute
+            class: org.apache.nifi.minifi.processors.LogAttribute
+          Funnels:
+          - id: 00000000-0000-0000-0000-000000000003
+          - id: 99999999-9999-9999-9999-999999999999
+          Connections:
+          - id: 00000000-0000-0000-0000-000000000004
+            name: 00000000-0000-0000-0000-000000000003//LogAttribute
+            source id: 00000000-0000-0000-0000-000000000003
+            source relationship names: []
+            destination id: 00000000-0000-0000-0000-000000000002
+          - id: 00000000-0000-0000-0000-000000000005
+            name: GenerateFlowFile1/success/00000000-0000-0000-0000-000000000003
+            source id: 00000000-0000-0000-0000-000000000001
+            source relationship names:
+            - success
+            destination id: 00000000-0000-0000-0000-000000000003
+          Remote Process Groups:
+          - id: 00000000-0000-0000-0000-000000000006
+            name: ''
+            url: http://localhost:8080/nifi
+            transport protocol: RAW
+            Input Ports:
+            - id: 00000000-0000-0000-0000-000000000007
+              name: test2
+              max concurrent tasks: 1
+              use compression: false
+            Output Ports: []
+          Controller Services:
+            - name: SSLContextService
+              id: 00000000-0000-0000-0000-000000000008
+              class: SSLContextService
+            )";
+
+      auto config_old = config_yaml;
+      utils::StringUtils::replaceAll(config_yaml, std::string("00000000-0000-0000-0000-00000000000") + i, "99999999-9999-9999-9999-999999999999");
+      std::istringstream config_yaml_stream(config_yaml);
+      REQUIRE_THROWS_WITH(yaml_config.getYamlRoot(config_yaml_stream), "General Operation: UUID 99999999-9999-9999-9999-999999999999 is duplicated in the flow configuration");
+    }
+  }
+}
diff --git a/libminifi/include/core/yaml/CheckRequiredField.h b/libminifi/include/core/yaml/CheckRequiredField.h
index e6e1b500f..0967a723b 100644
--- a/libminifi/include/core/yaml/CheckRequiredField.h
+++ b/libminifi/include/core/yaml/CheckRequiredField.h
@@ -53,9 +53,9 @@ std::string buildErrorMessage(const YAML::Node &yaml_node, const std::vector<std
  *                               not present in 'yaml_node'
  */
 void checkRequiredField(
-    const YAML::Node &yaml_node, std::string_view field_name, std::string_view yaml_section = "", std::string error_message = "");
+    const YAML::Node &yaml_node, std::string_view field_name, std::string_view yaml_section = "", std::string_view error_message = "");
 
-std::string getRequiredField(const YAML::Node &yaml_node, const std::vector<std::string> &alternate_names, std::string_view yaml_section, std::string error_message = {});
+std::string getRequiredField(const YAML::Node &yaml_node, const std::vector<std::string> &alternate_names, std::string_view yaml_section, std::string_view error_message = {});
 
 }  // namespace yaml
 }  // namespace core
diff --git a/libminifi/include/core/yaml/YamlConfiguration.h b/libminifi/include/core/yaml/YamlConfiguration.h
index dd6e163ba..0430092c3 100644
--- a/libminifi/include/core/yaml/YamlConfiguration.h
+++ b/libminifi/include/core/yaml/YamlConfiguration.h
@@ -21,6 +21,7 @@
 #include <memory>
 #include <optional>
 #include <string>
+#include <unordered_set>
 
 #include "core/FlowConfiguration.h"
 #include "core/logging/LoggerConfiguration.h"
@@ -277,6 +278,7 @@ class YamlConfiguration : public FlowConfiguration {
    * @return         the parsed or generated UUID string
    */
   std::string getOrGenerateId(const YAML::Node& yamlNode, const std::string& idField = "id");
+  std::string getRequiredIdField(const YAML::Node& yaml_node, std::string_view yaml_section = "", std::string_view error_message = "");
 
   /**
    * This is a helper function for getting an optional value, if it exists.
@@ -302,9 +304,11 @@ class YamlConfiguration : public FlowConfiguration {
   void parsePropertyValueSequence(const std::string& propertyName, const YAML::Node& propertyValueNode, core::ConfigurableComponent& processor);
   void parseSingleProperty(const std::string& propertyName, const YAML::Node& propertyValueNode, core::ConfigurableComponent& processor);
   void parsePropertyNodeElement(const std::string& propertyName, const YAML::Node& propertyValueNode, core::ConfigurableComponent& processor);
+  void addNewId(const std::string& uuid);
 
   std::shared_ptr<logging::Logger> logger_;
   static std::shared_ptr<utils::IdGenerator> id_generator_;
+  std::unordered_set<std::string> uuids_;
 
   /**
    * Raises a human-readable configuration error for the given configuration component/section.
diff --git a/libminifi/src/core/yaml/CheckRequiredField.cpp b/libminifi/src/core/yaml/CheckRequiredField.cpp
index 493e301c5..bb5b7abf8 100644
--- a/libminifi/src/core/yaml/CheckRequiredField.cpp
+++ b/libminifi/src/core/yaml/CheckRequiredField.cpp
@@ -51,25 +51,25 @@ std::string buildErrorMessage(const YAML::Node &yaml_node, const std::vector<std
   return err_msg;
 }
 
-void checkRequiredField(const YAML::Node &yaml_node, std::string_view field_name, std::string_view yaml_section, std::string error_message) {
+void checkRequiredField(const YAML::Node &yaml_node, std::string_view field_name, std::string_view yaml_section, std::string_view error_message) {
   if (!isFieldPresent(yaml_node, field_name)) {
     if (error_message.empty()) {
-      error_message = buildErrorMessage(yaml_node, std::vector<std::string>{std::string(field_name)}, yaml_section);
+      throw std::invalid_argument(buildErrorMessage(yaml_node, std::vector<std::string>{std::string(field_name)}, yaml_section));
     }
-    throw std::invalid_argument(error_message);
+    throw std::invalid_argument(error_message.data());
   }
 }
 
-std::string getRequiredField(const YAML::Node &yaml_node, const std::vector<std::string> &alternate_names, std::string_view yaml_section, std::string error_message) {
+std::string getRequiredField(const YAML::Node &yaml_node, const std::vector<std::string> &alternate_names, std::string_view yaml_section, std::string_view error_message) {
   for (const auto& name : alternate_names) {
     if (yaml::isFieldPresent(yaml_node, name)) {
       return yaml_node[name].as<std::string>();
     }
   }
   if (error_message.empty()) {
-    error_message = buildErrorMessage(yaml_node, alternate_names, yaml_section);
+    throw std::invalid_argument(buildErrorMessage(yaml_node, alternate_names, yaml_section));
   }
-  throw std::invalid_argument(error_message);
+  throw std::invalid_argument(error_message.data());
 }
 
 }  // namespace yaml
diff --git a/libminifi/src/core/yaml/YamlConfiguration.cpp b/libminifi/src/core/yaml/YamlConfiguration.cpp
index 299c7c8f9..6dc6537ab 100644
--- a/libminifi/src/core/yaml/YamlConfiguration.cpp
+++ b/libminifi/src/core/yaml/YamlConfiguration.cpp
@@ -124,23 +124,24 @@ std::unique_ptr<core::ProcessGroup> YamlConfiguration::parseProcessGroupYaml(con
 }
 
 std::unique_ptr<core::ProcessGroup> YamlConfiguration::getYamlRoot(const YAML::Node& rootYamlNode) {
-    YAML::Node controllerServiceNode = rootYamlNode[CONFIG_YAML_CONTROLLER_SERVICES_KEY];
-    YAML::Node provenanceReportNode = rootYamlNode[CONFIG_YAML_PROVENANCE_REPORT_KEY];
+  uuids_.clear();
+  YAML::Node controllerServiceNode = rootYamlNode[CONFIG_YAML_CONTROLLER_SERVICES_KEY];
+  YAML::Node provenanceReportNode = rootYamlNode[CONFIG_YAML_PROVENANCE_REPORT_KEY];
 
-    parseControllerServices(controllerServiceNode);
-    // Create the root process group
-    std::unique_ptr<core::ProcessGroup> root = parseRootProcessGroupYaml(rootYamlNode);
-    parseProvenanceReportingYaml(provenanceReportNode, root.get());
+  parseControllerServices(controllerServiceNode);
+  // Create the root process group
+  std::unique_ptr<core::ProcessGroup> root = parseRootProcessGroupYaml(rootYamlNode);
+  parseProvenanceReportingYaml(provenanceReportNode, root.get());
 
-    // set the controller services into the root group.
-    for (const auto& controller_service : controller_services_->getAllControllerServices()) {
-      root->addControllerService(controller_service->getName(), controller_service);
-      root->addControllerService(controller_service->getUUIDStr(), controller_service);
-    }
-
-    return root;
+  // set the controller services into the root group.
+  for (const auto& controller_service : controller_services_->getAllControllerServices()) {
+    root->addControllerService(controller_service->getName(), controller_service);
+    root->addControllerService(controller_service->getUUIDStr(), controller_service);
   }
 
+  return root;
+}
+
 void YamlConfiguration::parseProcessorNodeYaml(const YAML::Node& processorsNode, core::ProcessGroup* parentGroup) {
   int64_t runDurationNanos = -1;
   utils::Identifier uuid;
@@ -505,7 +506,6 @@ void YamlConfiguration::parseControllerServices(const YAML::Node& controllerServ
     const auto controllerServiceNode = iter.as<YAML::Node>();
     try {
       yaml::checkRequiredField(controllerServiceNode, "name", CONFIG_YAML_CONTROLLER_SERVICES_KEY);
-      yaml::checkRequiredField(controllerServiceNode, "id", CONFIG_YAML_CONTROLLER_SERVICES_KEY);
 
       auto type = yaml::getRequiredField(controllerServiceNode, std::vector<std::string>{"class", "type"}, CONFIG_YAML_CONTROLLER_SERVICES_KEY);
       logger_->log_debug("Using type %s for controller service node", type);
@@ -518,7 +518,7 @@ void YamlConfiguration::parseControllerServices(const YAML::Node& controllerServ
       }
 
       auto name = controllerServiceNode["name"].as<std::string>();
-      auto id = controllerServiceNode["id"].as<std::string>();
+      auto id = getRequiredIdField(controllerServiceNode, CONFIG_YAML_CONTROLLER_SERVICES_KEY);
 
       utils::Identifier uuid;
       uuid = id;
@@ -595,13 +595,12 @@ void YamlConfiguration::parsePortYaml(const YAML::Node& portNode, core::ProcessG
   // Check for required fields
   yaml::checkRequiredField(inputPortsObj, "name", CONFIG_YAML_REMOTE_PROCESS_GROUP_KEY);
   auto nameStr = inputPortsObj["name"].as<std::string>();
-  yaml::checkRequiredField(inputPortsObj, "id", CONFIG_YAML_REMOTE_PROCESS_GROUP_KEY,
-                     "The field 'id' is required for "
-                         "the port named '" + nameStr + "' in the YAML Config. If this port "
-                         "is an input port for a NiFi Remote Process Group, the port "
-                         "id should match the corresponding id specified in the NiFi configuration. "
-                         "This is a UUID of the format XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX.");
-  auto portId = inputPortsObj["id"].as<std::string>();
+  auto portId = getRequiredIdField(inputPortsObj, CONFIG_YAML_REMOTE_PROCESS_GROUP_KEY,
+    "The field 'id' is required for "
+    "the port named '" + nameStr + "' in the YAML Config. If this port "
+    "is an input port for a NiFi Remote Process Group, the port "
+    "id should match the corresponding id specified in the NiFi configuration. "
+    "This is a UUID of the format XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX.");
   uuid = portId;
 
   auto port = std::make_unique<minifi::RemoteProcessorGroupPort>(
@@ -873,14 +872,21 @@ std::string YamlConfiguration::getOrGenerateId(const YAML::Node& yamlNode, const
   if (node[idField]) {
     if (YAML::NodeType::Scalar == node[idField].Type()) {
       id = node[idField].as<std::string>();
-    } else {
-      throw std::invalid_argument("getOrGenerateId: idField is expected to reference YAML::Node "
-                                  "of YAML::NodeType::Scalar.");
+      addNewId(id);
+      return id;
     }
-  } else {
-    id = id_generator_->generate().to_string();
-    logger_->log_debug("Generating random ID: id => [%s]", id);
+    throw std::invalid_argument("getOrGenerateId: idField is expected to reference YAML::Node of YAML::NodeType::Scalar.");
   }
+
+  id = id_generator_->generate().to_string();
+  logger_->log_debug("Generating random ID: id => [%s]", id);
+  return id;
+}
+
+std::string YamlConfiguration::getRequiredIdField(const YAML::Node& yaml_node, std::string_view yaml_section, std::string_view error_message) {
+  yaml::checkRequiredField(yaml_node, "id", yaml_section, error_message);
+  auto id = yaml_node["id"].as<std::string>();
+  addNewId(id);
   return id;
 }
 
@@ -908,6 +914,13 @@ YAML::Node YamlConfiguration::getOptionalField(const YAML::Node& yamlNode, const
   return result;
 }
 
+void YamlConfiguration::addNewId(const std::string& uuid) {
+  const auto [_, success] = uuids_.insert(uuid);
+  if (!success) {
+    throw Exception(ExceptionType::GENERAL_EXCEPTION, "UUID " + uuid + " is duplicated in the flow configuration");
+  }
+}
+
 } /* namespace core */
 } /* namespace minifi */
 } /* namespace nifi */
diff --git a/libminifi/test/resources/TestHTTPSiteToSite.yml b/libminifi/test/resources/TestHTTPSiteToSite.yml
index 339d8c6d2..5b6baf63c 100644
--- a/libminifi/test/resources/TestHTTPSiteToSite.yml
+++ b/libminifi/test/resources/TestHTTPSiteToSite.yml
@@ -14,7 +14,7 @@
 # limitations under the License.
 
 Flow Controller:
-    id: 471deef6-2a6e-4a7d-912a-81cc17e3a205
+    id: 471deef6-2a6e-4a7d-912a-81cc17e3a209
     name: MiNiFi Flow
 
 Processors:
@@ -47,7 +47,7 @@ Processors:
 Connections:
     - name: GenerateFlowFileS2S
       id: 471deef6-2a6e-4a7d-912a-81cc17e3a207
-      source id: 471deef6-2a6e-4a7d-912a-81cc17e3a206 
+      source id: 471deef6-2a6e-4a7d-912a-81cc17e3a206
       source relationship name: success
       destination id: 471deef6-2a6e-4a7d-912a-81cc17e3a204
       max work queue size: 0
@@ -55,8 +55,8 @@ Connections:
       flowfile expiration: 60 sec
       queue prioritizer class: org.apache.nifi.prioritizer.NewestFlowFileFirstPrioritizer
     - name: GenerateFlowFileS2S
-      id: 471deef6-2a6e-4a7d-912a-81cc17e3a207
-      source id: 471deef6-2a6e-4a7d-912a-81cc17e3a203 
+      id: 471deef6-2a6e-4a7d-912a-81cc17e3a210
+      source id: 471deef6-2a6e-4a7d-912a-81cc17e3a203
       source relationship name: success
       destination id: 471deef6-2a6e-4a7d-912a-81cc17e3a205
       max work queue size: 0