You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by ab...@apache.org on 2021/04/23 17:13:07 UTC

[nifi-minifi-cpp] 01/02: MINIFICPP-1032 Refactor parse_url

This is an automated email from the ASF dual-hosted git repository.

aboda pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi-minifi-cpp.git

commit 870249f713219cce57f0719efa3e14c6e25e15bf
Author: Ferenc Gerlits <fg...@gmail.com>
AuthorDate: Tue Apr 6 12:51:29 2021 +0200

    MINIFICPP-1032 Refactor parse_url
    
    Modernize the parse_url() function and add error checking.
    
    Signed-off-by: Arpad Boda <ab...@apache.org>
    
    This closes #1046
---
 libminifi/include/RemoteProcessorGroupPort.h |  22 ++--
 libminifi/include/utils/HTTPClient.h         |  24 ++++-
 libminifi/src/c2/C2Agent.cpp                 |  16 ++-
 libminifi/src/utils/HTTPClient.cpp           | 155 ++++++++++++++++++---------
 libminifi/test/unit/HTTPUtilTests.cpp        |  97 ++++++++++-------
 5 files changed, 198 insertions(+), 116 deletions(-)

diff --git a/libminifi/include/RemoteProcessorGroupPort.h b/libminifi/include/RemoteProcessorGroupPort.h
index 3c33706..8598103 100644
--- a/libminifi/include/RemoteProcessorGroupPort.h
+++ b/libminifi/include/RemoteProcessorGroupPort.h
@@ -146,23 +146,17 @@ class RemoteProcessorGroupPort : public core::Processor {
    */
   void setURL(std::string val) {
     auto urls = utils::StringUtils::split(val, ",");
-    for (auto url : urls) {
-      logger_->log_trace("Parsing %s", url);
-      std::string host, protocol;
-      int port = -1;
-      url = utils::StringUtils::trim(url);
-      utils::parse_url(&url, &host, &port, &protocol);
-      logger_->log_trace("Parsed -%s- %s %s, %d", url, protocol, host, port);
-      if (port == -1) {
-        if (protocol.find("https") != std::string::npos) {
-          port = 443;
-        } else if (protocol.find("http") != std::string::npos) {
-          port = 80;
-        }
+    for (const auto& url : urls) {
+      utils::URL parsed_url{utils::StringUtils::trim(url)};
+      if (parsed_url.isValid()) {
+        logger_->log_debug("Parsed RPG URL '%s' -> '%s'", url, parsed_url.hostPort());
+        nifi_instances_.push_back({parsed_url.host(), parsed_url.port(), parsed_url.protocol()});
+      } else {
+        logger_->log_error("Could not parse RPG URL '%s'", url);
       }
-      nifi_instances_.push_back({ host, port, protocol });
     }
   }
+
   void setHTTPProxy(const utils::HTTPProxy &proxy) {
     this->proxy_ = proxy;
   }
diff --git a/libminifi/include/utils/HTTPClient.h b/libminifi/include/utils/HTTPClient.h
index 82aa55b..425c716 100644
--- a/libminifi/include/utils/HTTPClient.h
+++ b/libminifi/include/utils/HTTPClient.h
@@ -36,6 +36,7 @@
 #include "controllers/SSLContextService.h"
 #include "core/Deprecated.h"
 #include "utils/gsl.h"
+#include "utils/OptionalUtils.h"
 
 namespace org {
 namespace apache {
@@ -362,10 +363,27 @@ class BaseHTTPClient {
   virtual inline bool matches(const std::string &value, const std::string &sregex) = 0;
 };
 
-extern std::string get_token(utils::BaseHTTPClient *client, std::string username, std::string password);
+std::string get_token(utils::BaseHTTPClient *client, std::string username, std::string password);
+
+class URL {
+ public:
+  explicit URL(const std::string& url_input);
+  bool isValid() const { return is_valid_; }
+  std::string protocol() const { return protocol_; }
+  std::string host() const { return host_; }
+  int port() const;
+  std::string hostPort() const;
+  std::string toString() const;
+
+ private:
+  std::string protocol_;
+  std::string host_;
+  utils::optional<int> port_;
+  utils::optional<std::string> path_;
+  bool is_valid_ = false;
+  std::shared_ptr<logging::Logger> logger_ = logging::LoggerFactory<URL>::getLogger();
+};
 
-extern void parse_url(const std::string *url, std::string *host, int *port, std::string *protocol);
-extern void parse_url(const std::string *url, std::string *host, int *port, std::string *protocol, std::string *path, std::string *query);
 }  // namespace utils
 }  // namespace minifi
 }  // namespace nifi
diff --git a/libminifi/src/c2/C2Agent.cpp b/libminifi/src/c2/C2Agent.cpp
index 0914ed5..9c914cc 100644
--- a/libminifi/src/c2/C2Agent.cpp
+++ b/libminifi/src/c2/C2Agent.cpp
@@ -760,22 +760,20 @@ utils::optional<std::string> C2Agent::fetchFlow(const std::string& uri) const {
     std::stringstream adjusted_url;
     std::string base;
     if (configuration_->get(minifi::Configure::nifi_c2_flow_base_url, base)) {
+      base = utils::StringUtils::trim(base);
       adjusted_url << base;
       if (!utils::StringUtils::endsWith(base, "/")) {
         adjusted_url << "/";
       }
       adjusted_url << uri;
       resolved_url = adjusted_url.str();
-    } else if (configuration_->get("c2.rest.url", base)) {
-      std::string host, protocol;
-      int port = -1;
-      utils::parse_url(&base, &host, &port, &protocol);
-      adjusted_url << protocol << host;
-      if (port > 0) {
-        adjusted_url << ":" << port;
+    } else if (configuration_->get("nifi.c2.rest.url", "c2.rest.url", base)) {
+      utils::URL base_url{utils::StringUtils::trim(base)};
+      if (!base_url.isValid()) {
+        logger_->log_error("Could not parse C2 REST URL '%s'", base);
+        return {};
       }
-      adjusted_url << "/c2/api/" << uri;
-      resolved_url = adjusted_url.str();
+      resolved_url = base_url.hostPort() + "/c2/api/" + uri;
     }
   }
 
diff --git a/libminifi/src/utils/HTTPClient.cpp b/libminifi/src/utils/HTTPClient.cpp
index 3328aa5..fe9d702 100644
--- a/libminifi/src/utils/HTTPClient.cpp
+++ b/libminifi/src/utils/HTTPClient.cpp
@@ -15,8 +15,43 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "utils/HTTPClient.h"
+#include <algorithm>
 #include <string>
+
+#include "utils/HTTPClient.h"
+#include "utils/StringUtils.h"
+
+namespace {
+
+constexpr const char* HTTP = "http://";
+constexpr const char* HTTPS = "https://";
+
+utils::optional<std::string> parseProtocol(const std::string& url_input) {
+  if (utils::StringUtils::startsWith(url_input, HTTP)) {
+    return HTTP;
+  } else if (utils::StringUtils::startsWith(url_input, HTTPS)) {
+    return HTTPS;
+  } else {
+    return {};
+  }
+}
+
+utils::optional<int> parsePortNumber(const std::string& port_string) {
+  try {
+    size_t pos;
+    int port = std::stoi(port_string, &pos);
+    if (pos == port_string.size()) {
+      return port;
+    }
+  } catch (const std::invalid_argument&) {
+  } catch (const std::out_of_range&) {
+  }
+
+  return {};
+}
+
+}  // namespace
+
 namespace org {
 namespace apache {
 namespace nifi {
@@ -48,72 +83,86 @@ std::string get_token(utils::BaseHTTPClient *client, std::string username, std::
   return token;
 }
 
-void parse_url(const std::string *url, std::string *host, int *port, std::string *protocol) {
-  static std::string http("http://");
-  static std::string https("https://");
-
-  if (url->compare(0, http.size(), http) == 0)
-    *protocol = http;
-
-  if (url->compare(0, https.size(), https) == 0)
-    *protocol = https;
-
-  if (!protocol->empty()) {
-    size_t pos = url->find_first_of(":", protocol->size());
+URL::URL(const std::string& url_input) {
+  const auto protocol = parseProtocol(url_input);
+  if (protocol) {
+    protocol_ = *protocol;
+  } else {
+    logger_->log_error("Unknown protocol in URL '%s'", url_input);
+    return;
+  }
 
-    if (pos == std::string::npos) {
-      pos = url->size();
-    }
+  std::string::const_iterator current_pos = url_input.begin();
+  std::advance(current_pos, protocol_.size());
 
-    *host = url->substr(protocol->size(), pos - protocol->size());
-
-    if (pos < url->size() && (*url)[pos] == ':') {
-      size_t ppos = url->find_first_of("/", pos);
-      if (ppos == std::string::npos) {
-        ppos = url->size();
-      }
-      std::string portStr(url->substr(pos + 1, ppos - pos - 1));
-      if (portStr.size() > 0) {
-        *port = std::stoi(portStr);
-      }
+  constexpr const char HOST_TERMINATORS[] = ":/?#";
+  std::string::const_iterator end_of_host = std::find_first_of(current_pos, url_input.end(), std::begin(HOST_TERMINATORS), std::end(HOST_TERMINATORS));
+  host_ = std::string{current_pos, end_of_host};
+  if (host_.empty()) {
+    logger_->log_error("No host found in URL '%s'", url_input);
+    return;
+  }
+  current_pos = end_of_host;
+
+  if (current_pos != url_input.end() && *current_pos == ':') {
+    constexpr const char PORT_TERMINATORS[] = "/?#";
+    ++current_pos;
+    std::string::const_iterator end_of_port = std::find_first_of(current_pos, url_input.end(), std::begin(PORT_TERMINATORS), std::end(PORT_TERMINATORS));
+    const auto port_number = parsePortNumber(std::string{current_pos, end_of_port});
+    if (port_number) {
+      port_ = *port_number;
     } else {
-      // In case the host contains no port, the first part is needed only
-      // For eg.: nifi.io/nifi
-      size_t ppos = host->find_first_of("/");
-      if (ppos != std::string::npos) {
-        *host = host->substr(0, ppos);
-      }
+      logger_->log_error("Could not parse the port number in URL '%s'", url_input);
+      return;
     }
+    current_pos = end_of_port;
+  }
+
+  if (current_pos != url_input.end()) {
+    path_ = std::string{current_pos, url_input.end()};
   }
-}
 
-void parse_url(const std::string *url, std::string *host, int *port, std::string *protocol, std::string *path, std::string *query) {
-  int temp_port = -1;
+  is_valid_ = true;
+}
 
-  parse_url(url, host, &temp_port, protocol);
+int URL::port() const {
+  if (port_) {
+    return *port_;
+  } else if (protocol_ == HTTP) {
+    return 80;
+  } else if (protocol_ == HTTPS) {
+    return 443;
+  } else {
+    throw std::logic_error{"Undefined port in URL: " + toString()};
+  }
+}
 
-  if (host->empty() || protocol->empty()) {
-    return;
+std::string URL::hostPort() const {
+  if (!isValid()) {
+    return "INVALID";
   }
 
-  size_t base_len = host->size() + protocol->size();
-  if (temp_port != -1) {
-    *port = temp_port;
-    base_len += std::to_string(temp_port).size() + 1;  // +1 for the :
+  if (port_) {
+    return protocol_ + host_ + ':' + std::to_string(*port_);
+  } else {
+    return protocol_ + host_;
   }
+}
 
-  auto query_loc = url->find_first_of("?", base_len);
+std::string URL::toString() const {
+  if (!isValid()) {
+    return "INVALID";
+  }
 
-  if (query_loc < url->size()) {
-    *path = url->substr(base_len + 1, query_loc - base_len - 1);
-    *query = url->substr(query_loc + 1, url->size() - query_loc - 1);
+  if (path_) {
+    return hostPort() + *path_;
   } else {
-    *path = url->substr(base_len + 1, url->size() - base_len - 1);
+    return hostPort();
   }
 }
 
-} /* namespace utils */
-} /* namespace minifi */
-} /* namespace nifi */
-} /* namespace apache */
-} /* namespace org */
+}  // namespace utils
+}  // namespace minifi
+}  // namespace nifi
+}  // namespace apache
+}  // namespace org
diff --git a/libminifi/test/unit/HTTPUtilTests.cpp b/libminifi/test/unit/HTTPUtilTests.cpp
index e3b7c45..dfb04f4 100644
--- a/libminifi/test/unit/HTTPUtilTests.cpp
+++ b/libminifi/test/unit/HTTPUtilTests.cpp
@@ -17,51 +17,74 @@
  */
 
 #include <string>
-#include <iostream>
 #include "../TestBase.h"
 #include "utils/HTTPClient.h"
 
-TEST_CASE("TestHTTPUtils::simple", "[test parse no port]") {
-  std::string protocol, host;
-  int port = -1;
-  std::string url = "http://nifi.io/nifi";
-  minifi::utils::parse_url(&url, &host, &port, &protocol);
-  REQUIRE(port == -1);
-  REQUIRE(host == "nifi.io");
-  REQUIRE(protocol == "http://");
+TEST_CASE("The URL class can parse various URL strings", "[URL][parsing]") {
+  const auto canParseURL = [](const std::string& url_string) { return utils::URL{url_string}.toString() == url_string; };
+
+  REQUIRE(canParseURL("http://nifi.io"));
+  REQUIRE(canParseURL("http://nifi.io:777"));
+  REQUIRE(canParseURL("http://nifi.io/nifi"));
+  REQUIRE(canParseURL("https://nifi.somewhere.far.away:321/nifi"));
+  REQUIRE(canParseURL("http://nifi.io?what_is_love"));
+  REQUIRE(canParseURL("http://nifi.io:123?what_is_love"));
+  REQUIRE(canParseURL("https://nifi.io/nifi_path?what_is_love"));
+  REQUIRE(canParseURL("http://nifi.io:4321/nifi_path?what_is_love"));
+  REQUIRE(canParseURL("http://nifi.io#anchors_aweigh"));
+  REQUIRE(canParseURL("https://nifi.io:123#anchors_aweigh"));
+  REQUIRE(canParseURL("http://nifi.io/nifi_path#anchors_aweigh"));
+  REQUIRE(canParseURL("https://nifi.io:4321/nifi_path#anchors_aweigh"));
 }
 
-TEST_CASE("TestHTTPUtils::urlwithport", "[test parse with port]") {
-  std::string protocol, host;
-  int port = -1;
-  std::string url = "https://nifi.somewhere.far.away:321/nifi";
-  minifi::utils::parse_url(&url, &host, &port, &protocol);
-  REQUIRE(port == 321);
-  REQUIRE(host == "nifi.somewhere.far.away");
-  REQUIRE(protocol == "https://");
+TEST_CASE("The URL class will fail to parse invalid URL strings", "[URL][parsing]") {
+  const auto failToParseURL = [](const std::string& url_string) { return utils::URL{url_string}.toString() == "INVALID"; };
+
+  REQUIRE(failToParseURL("mailto:santa.claus@north.pole.org"));
+  REQUIRE(failToParseURL("http:nifi.io"));
+  REQUIRE(failToParseURL("http://"));
+  REQUIRE(failToParseURL("http://:123"));
+  REQUIRE(failToParseURL("http://nifi.io:0x50"));
+  REQUIRE(failToParseURL("http://nifi.io:port_number"));
 }
 
-TEST_CASE("TestHTTPUtils::query", "[test parse query without port]") {
-  std::string protocol, host, path, query;
-  int port = -1;
-  std::string url = "https://nifi.io/nifi/path?what";
-  minifi::utils::parse_url(&url, &host, &port, &protocol, &path, &query);
-  REQUIRE(port == -1);
-  REQUIRE(host == "nifi.io");
-  REQUIRE(protocol == "https://");
-  REQUIRE(path == "nifi/path");
-  REQUIRE(query == "what");
+TEST_CASE("The URL class can extract the port from URL strings", "[URL][port]") {
+  REQUIRE(utils::URL{"http://nifi.io"}.port() == 80);
+  REQUIRE(utils::URL{"http://nifi.io/nifi"}.port() == 80);
+  REQUIRE(utils::URL{"https://nifi.io"}.port() == 443);
+  REQUIRE(utils::URL{"https://nifi.io/nifi"}.port() == 443);
+  REQUIRE(utils::URL{"http://nifi.io:123"}.port() == 123);
+  REQUIRE(utils::URL{"http://nifi.io:123/nifi"}.port() == 123);
+  REQUIRE(utils::URL{"https://nifi.io:456"}.port() == 456);
+  REQUIRE(utils::URL{"https://nifi.io:456/nifi"}.port() == 456);
 }
 
-TEST_CASE("TestHTTPUtils::querywithport", "[test parse query with port]") {
-  std::string protocol, host, path, query;
-  int port = -1;
-  std::string url = "http://nifi.io:4321/nifi_path?what_is_love";
-  minifi::utils::parse_url(&url, &host, &port, &protocol, &path, &query);
-  REQUIRE(port == 4321);
-  REQUIRE(host == "nifi.io");
-  REQUIRE(protocol == "http://");
-  REQUIRE(path == "nifi_path");
-  REQUIRE(query == "what_is_love");
+TEST_CASE("The URL class can extract the host", "[URL][host]") {
+  REQUIRE(utils::URL{"http://nifi.io"}.host() == "nifi.io");
+  REQUIRE(utils::URL{"http://nifi.io:777"}.host() == "nifi.io");
+  REQUIRE(utils::URL{"http://nifi.io/nifi"}.host() == "nifi.io");
+  REQUIRE(utils::URL{"https://nifi.somewhere.far.away:321/nifi"}.host() == "nifi.somewhere.far.away");
+  REQUIRE(utils::URL{"http://nifi.io?what_is_love"}.host() == "nifi.io");
+  REQUIRE(utils::URL{"http://nifi.io:123?what_is_love"}.host() == "nifi.io");
+  REQUIRE(utils::URL{"https://nifi.io/nifi_path?what_is_love"}.host() == "nifi.io");
+  REQUIRE(utils::URL{"http://nifi.io:4321/nifi_path?what_is_love"}.host() == "nifi.io");
+  REQUIRE(utils::URL{"http://nifi.io#anchors_aweigh"}.host() == "nifi.io");
+  REQUIRE(utils::URL{"https://nifi.io:123#anchors_aweigh"}.host() == "nifi.io");
+  REQUIRE(utils::URL{"http://nifi.io/nifi_path#anchors_aweigh"}.host() == "nifi.io");
+  REQUIRE(utils::URL{"https://nifi.io:4321/nifi_path#anchors_aweigh"}.host() == "nifi.io");
 }
 
+TEST_CASE("The URL class can extract the host and port", "[URL][hostPort]") {
+  REQUIRE(utils::URL{"http://nifi.io"}.hostPort() == "http://nifi.io");
+  REQUIRE(utils::URL{"http://nifi.io:777"}.hostPort() == "http://nifi.io:777");
+  REQUIRE(utils::URL{"http://nifi.io/nifi"}.hostPort() == "http://nifi.io");
+  REQUIRE(utils::URL{"https://nifi.somewhere.far.away:321/nifi"}.hostPort() == "https://nifi.somewhere.far.away:321");
+  REQUIRE(utils::URL{"http://nifi.io?what_is_love"}.hostPort() == "http://nifi.io");
+  REQUIRE(utils::URL{"http://nifi.io:123?what_is_love"}.hostPort() == "http://nifi.io:123");
+  REQUIRE(utils::URL{"https://nifi.io/nifi_path?what_is_love"}.hostPort() == "https://nifi.io");
+  REQUIRE(utils::URL{"http://nifi.io:4321/nifi_path?what_is_love"}.hostPort() == "http://nifi.io:4321");
+  REQUIRE(utils::URL{"http://nifi.io#anchors_aweigh"}.hostPort() == "http://nifi.io");
+  REQUIRE(utils::URL{"https://nifi.io:123#anchors_aweigh"}.hostPort() == "https://nifi.io:123");
+  REQUIRE(utils::URL{"http://nifi.io/nifi_path#anchors_aweigh"}.hostPort() == "http://nifi.io");
+  REQUIRE(utils::URL{"https://nifi.io:4321/nifi_path#anchors_aweigh"}.hostPort() == "https://nifi.io:4321");
+}