You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by al...@apache.org on 2018/04/09 20:10:22 UTC

nifi-minifi-cpp git commit: MINIFICPP-445 Added escape/unescape CSV expression language functions

Repository: nifi-minifi-cpp
Updated Branches:
  refs/heads/master 2e4a3521e -> 253a1b74f


MINIFICPP-445 Added escape/unescape CSV expression language functions

This closes #293.

Signed-off-by: Aldrin Piri <al...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/commit/253a1b74
Tree: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/tree/253a1b74
Diff: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/diff/253a1b74

Branch: refs/heads/master
Commit: 253a1b74f842479b7fa25fdca76b8e83a5e7a059
Parents: 2e4a352
Author: Andrew I. Christianson <an...@andyic.org>
Authored: Fri Mar 23 12:49:51 2018 -0400
Committer: Aldrin Piri <al...@apache.org>
Committed: Mon Apr 9 16:09:17 2018 -0400

----------------------------------------------------------------------
 EXPRESSIONS.md                                  | 80 +++++++++++++++++++-
 extensions/expression-language/Expression.cpp   | 55 ++++++++++++++
 libminifi/include/utils/StringUtils.h           | 19 ++++-
 .../ExpressionLanguageTests.cpp                 | 36 ++++++++-
 4 files changed, 181 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/253a1b74/EXPRESSIONS.md
----------------------------------------------------------------------
diff --git a/EXPRESSIONS.md b/EXPRESSIONS.md
index 371ddb4..4a72dae 100644
--- a/EXPRESSIONS.md
+++ b/EXPRESSIONS.md
@@ -204,6 +204,10 @@ token, filename.
 
 - [`escapeJson`](#escapejson)
 - [`unescapeJson`](#unescapejson)
+- [`escapeXml`](#escapexml)
+- [`escapeCsv`](#escapecsv)
+- [`unescapeXml`](#unescapexml)
+- [`unescapeCsv`](#unescapecsv)
 
 ## Planned Features
 
@@ -221,12 +225,8 @@ token, filename.
 
 ### Encode/Decode Functions
 
-- `escapeXml`
-- `escapeCsv`
 - `escapeHtml3`
 - `escapeHtml4`
-- `unescapeXml`
-- `unescapeCsv`
 - `unescapeHtml3`
 - `unescapeHtml4`
 - `urlEncode`
@@ -1266,3 +1266,75 @@ If the "message" attribute is 'This is a "test!"', then the Expression
 
 If the "message" attribute is 'This is a \"test!\"', then the Expression
 `${message:unescapeJson()}` will return 'This is a "test!"'
+
+### escapeXml
+
+**Description**: This function prepares the Subject to be inserted into XML
+document by escaping the characters in a String using XML entities. The
+function correctly escapes quotes, apostrophe, ampersand, `<`, `>` and
+control-chars.
+
+**Subject Type**: String
+
+**Arguments**: No arguments
+
+**Return Type**: String
+
+**Examples**:
+
+If the "message" attribute is `Zero > One < \"two!\" & 'true'`, then the
+Expression `${message:escapeXml()}` will return `Zero &gt; One &lt;
+&quot;two!&quot; &amp; &apos;true&apos;`
+
+### unescapeXml
+
+**Description**: This function unescapes a string containing XML entity escapes
+to a string containing the actual Unicode characters corresponding to the
+escapes. Supports only the five basic XML entities (gt, lt, quot, amp, apos).
+
+**Subject Type**: String
+
+**Arguments**: No arguments
+
+**Return Type**: String
+
+**Examples**:
+
+If the "message" attribute is `Zero &gt; One &lt; &quot;two!&quot; &amp;
+&apos;true&apos;`, then the Expression `${message:escapeXml()}` will return
+`Zero > One < \"two!\" & 'true'`
+
+### escapeCsv
+
+**Description**: This function prepares the Subject to be inserted into CSV
+document by escaping the characters in a String using the rules in RFC 4180.
+The function correctly escapes quotes and surround the string in quotes if
+needed.
+
+**Subject Type**: String
+
+**Arguments**: No arguments
+
+**Return Type**: String
+
+**Examples**:
+
+If the "message" attribute is `Zero > One < "two!" & 'true'`, then the
+Expression `${message:escapeCsv()}` will return `"Zero > One < ""two!"" &
+'true'"`
+
+### unescapeCsv
+
+**Description**: This function unescapes a String from a CSV document according
+to the rules of RFC 4180
+
+**Subject Type**: String
+
+**Arguments**: No arguments
+
+**Return Type**: String
+
+**Examples**:
+
+If the "message" attribute is `"Zero > One < ""two!"" & 'true'"`, then the
+Expression `${message:escapeCsv()}` will return `Zero > One < "two!" & 'true'`

http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/253a1b74/extensions/expression-language/Expression.cpp
----------------------------------------------------------------------
diff --git a/extensions/expression-language/Expression.cpp b/extensions/expression-language/Expression.cpp
index a4ee41b..cd5199b 100644
--- a/extensions/expression-language/Expression.cpp
+++ b/extensions/expression-language/Expression.cpp
@@ -222,6 +222,57 @@ Value expr_unescapeXml(const std::vector<Value> &args) {
       }));
 }
 
+Value expr_escapeCsv(const std::vector<Value> &args) {
+  auto result = args[0].asString();
+  const char quote_req_chars[] = {'"', '\r', '\n', ','};
+  bool quote_required = false;
+
+  for (const auto &c : quote_req_chars) {
+    if (result.find(c) != std::string::npos) {
+      quote_required = true;
+      break;
+    }
+  }
+
+  if (quote_required) {
+    std::string quoted_result = "\"";
+    quoted_result.append(utils::StringUtils::replaceMap(result, {{"\"", "\"\""}}));
+    quoted_result.append("\"");
+    return Value(quoted_result);
+  }
+
+  return Value(result);
+}
+
+Value expr_unescapeCsv(const std::vector<Value> &args) {
+  auto result = args[0].asString();
+
+  if (result[0] == '"' && result[result.size() - 1] == '"') {
+    bool quote_required = false;
+
+    size_t quote_pos = result.find('"', 1);
+
+    if (quote_pos != result.length() - 1) {
+      quote_required = true;
+    } else {
+      const char quote_req_chars[] = {'\r', '\n', ','};
+
+      for (const auto &c : quote_req_chars) {
+        if (result.find(c) != std::string::npos) {
+          quote_required = true;
+          break;
+        }
+      }
+    }
+
+    if (quote_required) {
+      return Value(utils::StringUtils::replaceMap(result.substr(1, result.size() - 2), {{"\"\"", "\""}}));
+    }
+  }
+
+  return Value(result);
+}
+
 #ifdef EXPRESSION_LANGUAGE_USE_REGEX
 
 Value expr_replace(const std::vector<Value> &args) {
@@ -555,6 +606,10 @@ Expression make_dynamic_function(const std::string &function_name,
     return make_dynamic_function_incomplete<expr_escapeXml>(function_name, args, 0);
   } else if (function_name == "unescapeXml") {
     return make_dynamic_function_incomplete<expr_unescapeXml>(function_name, args, 0);
+  } else if (function_name == "escapeCsv") {
+    return make_dynamic_function_incomplete<expr_escapeCsv>(function_name, args, 0);
+  } else if (function_name == "unescapeCsv") {
+    return make_dynamic_function_incomplete<expr_unescapeCsv>(function_name, args, 0);
 #ifdef EXPRESSION_LANGUAGE_USE_REGEX
   } else if (function_name == "replace") {
     return make_dynamic_function_incomplete<expr_replace>(function_name, args, 2);

http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/253a1b74/libminifi/include/utils/StringUtils.h
----------------------------------------------------------------------
diff --git a/libminifi/include/utils/StringUtils.h b/libminifi/include/utils/StringUtils.h
index 4bedce7..7f33260 100644
--- a/libminifi/include/utils/StringUtils.h
+++ b/libminifi/include/utils/StringUtils.h
@@ -210,15 +210,28 @@ class StringUtils {
   }
   
   static std::string replaceMap(std::string source_string, const std::map<std::string, std::string> &replace_map) {
+    auto result_string = source_string;
+
+    std::vector<std::pair<size_t, std::pair<size_t, std::string>>> replacements;
     for (const auto &replace_pair : replace_map) {
       size_t replace_pos = 0;
       while ((replace_pos = source_string.find(replace_pair.first, replace_pos)) != std::string::npos) {
-        source_string.replace(replace_pos, replace_pair.first.length(), replace_pair.second);
-        replace_pos += replace_pair.second.length();
+        replacements.emplace_back(std::make_pair(replace_pos,
+                                                 std::make_pair(replace_pair.first.length(), replace_pair.second)));
+        replace_pos += replace_pair.first.length();
       }
     }
 
-    return source_string;
+    std::sort(replacements.begin(), replacements.end(), [](const std::pair<size_t, std::pair<size_t, std::string>> a,
+                                                           const std::pair<size_t, std::pair<size_t, std::string>> &b) {
+      return a.first > b.first;
+    });
+
+    for (const auto &replacement : replacements) {
+      result_string = source_string.replace(replacement.first, replacement.second.first, replacement.second.second);
+    }
+
+    return result_string;
   }
 
 };

http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/253a1b74/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp
----------------------------------------------------------------------
diff --git a/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp b/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp
index a198335..12c60ca 100644
--- a/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp
+++ b/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp
@@ -1046,14 +1046,14 @@ TEST_CASE("Encode XML", "[expressionEncodeXML]") {  // NOLINT
 
   auto flow_file_a = std::make_shared<MockFlowFile>();
   flow_file_a->addAttribute("message", "Zero > One < \"two!\" & 'true'");
-  REQUIRE("Zero &gt; One &lt; &amp;quot;two!&amp;quot; &amp; &apos;true&apos;" == expr({flow_file_a}).asString());
+  REQUIRE("Zero &gt; One &lt; &quot;two!&quot; &amp; &apos;true&apos;" == expr({flow_file_a}).asString());
 }
 
 TEST_CASE("Decode XML", "[expressionDecodeXML]") {  // NOLINT
   auto expr = expression::compile("${message:unescapeXml()}");
 
   auto flow_file_a = std::make_shared<MockFlowFile>();
-  flow_file_a->addAttribute("message", "Zero &gt; One &lt; &amp;quot;two!&amp;quot; &amp; &apos;true&apos;");
+  flow_file_a->addAttribute("message", "Zero &gt; One &lt; &quot;two!&quot; &amp; &apos;true&apos;");
   REQUIRE("Zero > One < \"two!\" & 'true'" == expr({flow_file_a}).asString());
 }
 
@@ -1064,3 +1064,35 @@ TEST_CASE("Encode Decode XML", "[expressionEncodeDecodeXML]") {  // NOLINT
   flow_file_a->addAttribute("message", "Zero > One < \"two!\" & 'true'");
   REQUIRE("Zero > One < \"two!\" & 'true'" == expr({flow_file_a}).asString());
 }
+
+TEST_CASE("Encode CSV", "[expressionEncodeCSV]") {  // NOLINT
+  auto expr = expression::compile("${message:escapeCsv()}");
+
+  auto flow_file_a = std::make_shared<MockFlowFile>();
+  flow_file_a->addAttribute("message", "Zero > One < \"two!\" & 'true'");
+  REQUIRE("\"Zero > One < \"\"two!\"\" & 'true'\"" == expr({flow_file_a}).asString());
+}
+
+TEST_CASE("Decode CSV", "[expressionDecodeCSV]") {  // NOLINT
+  auto expr = expression::compile("${message:unescapeCsv()}");
+
+  auto flow_file_a = std::make_shared<MockFlowFile>();
+  flow_file_a->addAttribute("message", R"("Zero > One < ""two!"" & 'true'")");
+  REQUIRE("Zero > One < \"two!\" & 'true'" == expr({flow_file_a}).asString());
+}
+
+TEST_CASE("Decode CSV 2", "[expressionDecodeCSV2]") {  // NOLINT
+  auto expr = expression::compile("${message:unescapeCsv()}");
+
+  auto flow_file_a = std::make_shared<MockFlowFile>();
+  flow_file_a->addAttribute("message", R"("quoted")");
+  REQUIRE("\"quoted\"" == expr({flow_file_a}).asString());
+}
+
+TEST_CASE("Encode Decode CSV", "[expressionEncodeDecodeCSV]") {  // NOLINT
+  auto expr = expression::compile("${message:escapeCsv():unescapeCsv()}");
+
+  auto flow_file_a = std::make_shared<MockFlowFile>();
+  flow_file_a->addAttribute("message", "Zero > One < \"two!\" & 'true'");
+  REQUIRE("Zero > One < \"two!\" & 'true'" == expr({flow_file_a}).asString());
+}