You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by al...@apache.org on 2018/04/09 20:10:22 UTC
nifi-minifi-cpp git commit: MINIFICPP-445 Added escape/unescape CSV
expression language functions
Repository: nifi-minifi-cpp
Updated Branches:
refs/heads/master 2e4a3521e -> 253a1b74f
MINIFICPP-445 Added escape/unescape CSV expression language functions
This closes #293.
Signed-off-by: Aldrin Piri <al...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/commit/253a1b74
Tree: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/tree/253a1b74
Diff: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/diff/253a1b74
Branch: refs/heads/master
Commit: 253a1b74f842479b7fa25fdca76b8e83a5e7a059
Parents: 2e4a352
Author: Andrew I. Christianson <an...@andyic.org>
Authored: Fri Mar 23 12:49:51 2018 -0400
Committer: Aldrin Piri <al...@apache.org>
Committed: Mon Apr 9 16:09:17 2018 -0400
----------------------------------------------------------------------
EXPRESSIONS.md | 80 +++++++++++++++++++-
extensions/expression-language/Expression.cpp | 55 ++++++++++++++
libminifi/include/utils/StringUtils.h | 19 ++++-
.../ExpressionLanguageTests.cpp | 36 ++++++++-
4 files changed, 181 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/253a1b74/EXPRESSIONS.md
----------------------------------------------------------------------
diff --git a/EXPRESSIONS.md b/EXPRESSIONS.md
index 371ddb4..4a72dae 100644
--- a/EXPRESSIONS.md
+++ b/EXPRESSIONS.md
@@ -204,6 +204,10 @@ token, filename.
- [`escapeJson`](#escapejson)
- [`unescapeJson`](#unescapejson)
+- [`escapeXml`](#escapexml)
+- [`escapeCsv`](#escapecsv)
+- [`unescapeXml`](#unescapexml)
+- [`unescapeCsv`](#unescapecsv)
## Planned Features
@@ -221,12 +225,8 @@ token, filename.
### Encode/Decode Functions
-- `escapeXml`
-- `escapeCsv`
- `escapeHtml3`
- `escapeHtml4`
-- `unescapeXml`
-- `unescapeCsv`
- `unescapeHtml3`
- `unescapeHtml4`
- `urlEncode`
@@ -1266,3 +1266,75 @@ If the "message" attribute is 'This is a "test!"', then the Expression
If the "message" attribute is 'This is a \"test!\"', then the Expression
`${message:unescapeJson()}` will return 'This is a "test!"'
+
+### escapeXml
+
+**Description**: This function prepares the Subject to be inserted into XML
+document by escaping the characters in a String using XML entities. The
+function correctly escapes quotes, apostrophe, ampersand, `<`, `>` and
+control-chars.
+
+**Subject Type**: String
+
+**Arguments**: No arguments
+
+**Return Type**: String
+
+**Examples**:
+
+If the "message" attribute is `Zero > One < \"two!\" & 'true'`, then the
+Expression `${message:escapeXml()}` will return `Zero > One <
+"two!" & 'true'`
+
+### unescapeXml
+
+**Description**: This function unescapes a string containing XML entity escapes
+to a string containing the actual Unicode characters corresponding to the
+escapes. Supports only the five basic XML entities (gt, lt, quot, amp, apos).
+
+**Subject Type**: String
+
+**Arguments**: No arguments
+
+**Return Type**: String
+
+**Examples**:
+
+If the "message" attribute is `Zero > One < "two!" &
+'true'`, then the Expression `${message:escapeXml()}` will return
+`Zero > One < \"two!\" & 'true'`
+
+### escapeCsv
+
+**Description**: This function prepares the Subject to be inserted into CSV
+document by escaping the characters in a String using the rules in RFC 4180.
+The function correctly escapes quotes and surround the string in quotes if
+needed.
+
+**Subject Type**: String
+
+**Arguments**: No arguments
+
+**Return Type**: String
+
+**Examples**:
+
+If the "message" attribute is `Zero > One < "two!" & 'true'`, then the
+Expression `${message:escapeCsv()}` will return `"Zero > One < ""two!"" &
+'true'"`
+
+### unescapeCsv
+
+**Description**: This function unescapes a String from a CSV document according
+to the rules of RFC 4180
+
+**Subject Type**: String
+
+**Arguments**: No arguments
+
+**Return Type**: String
+
+**Examples**:
+
+If the "message" attribute is `"Zero > One < ""two!"" & 'true'"`, then the
+Expression `${message:escapeCsv()}` will return `Zero > One < "two!" & 'true'`
http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/253a1b74/extensions/expression-language/Expression.cpp
----------------------------------------------------------------------
diff --git a/extensions/expression-language/Expression.cpp b/extensions/expression-language/Expression.cpp
index a4ee41b..cd5199b 100644
--- a/extensions/expression-language/Expression.cpp
+++ b/extensions/expression-language/Expression.cpp
@@ -222,6 +222,57 @@ Value expr_unescapeXml(const std::vector<Value> &args) {
}));
}
+Value expr_escapeCsv(const std::vector<Value> &args) {
+ auto result = args[0].asString();
+ const char quote_req_chars[] = {'"', '\r', '\n', ','};
+ bool quote_required = false;
+
+ for (const auto &c : quote_req_chars) {
+ if (result.find(c) != std::string::npos) {
+ quote_required = true;
+ break;
+ }
+ }
+
+ if (quote_required) {
+ std::string quoted_result = "\"";
+ quoted_result.append(utils::StringUtils::replaceMap(result, {{"\"", "\"\""}}));
+ quoted_result.append("\"");
+ return Value(quoted_result);
+ }
+
+ return Value(result);
+}
+
+Value expr_unescapeCsv(const std::vector<Value> &args) {
+ auto result = args[0].asString();
+
+ if (result[0] == '"' && result[result.size() - 1] == '"') {
+ bool quote_required = false;
+
+ size_t quote_pos = result.find('"', 1);
+
+ if (quote_pos != result.length() - 1) {
+ quote_required = true;
+ } else {
+ const char quote_req_chars[] = {'\r', '\n', ','};
+
+ for (const auto &c : quote_req_chars) {
+ if (result.find(c) != std::string::npos) {
+ quote_required = true;
+ break;
+ }
+ }
+ }
+
+ if (quote_required) {
+ return Value(utils::StringUtils::replaceMap(result.substr(1, result.size() - 2), {{"\"\"", "\""}}));
+ }
+ }
+
+ return Value(result);
+}
+
#ifdef EXPRESSION_LANGUAGE_USE_REGEX
Value expr_replace(const std::vector<Value> &args) {
@@ -555,6 +606,10 @@ Expression make_dynamic_function(const std::string &function_name,
return make_dynamic_function_incomplete<expr_escapeXml>(function_name, args, 0);
} else if (function_name == "unescapeXml") {
return make_dynamic_function_incomplete<expr_unescapeXml>(function_name, args, 0);
+ } else if (function_name == "escapeCsv") {
+ return make_dynamic_function_incomplete<expr_escapeCsv>(function_name, args, 0);
+ } else if (function_name == "unescapeCsv") {
+ return make_dynamic_function_incomplete<expr_unescapeCsv>(function_name, args, 0);
#ifdef EXPRESSION_LANGUAGE_USE_REGEX
} else if (function_name == "replace") {
return make_dynamic_function_incomplete<expr_replace>(function_name, args, 2);
http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/253a1b74/libminifi/include/utils/StringUtils.h
----------------------------------------------------------------------
diff --git a/libminifi/include/utils/StringUtils.h b/libminifi/include/utils/StringUtils.h
index 4bedce7..7f33260 100644
--- a/libminifi/include/utils/StringUtils.h
+++ b/libminifi/include/utils/StringUtils.h
@@ -210,15 +210,28 @@ class StringUtils {
}
static std::string replaceMap(std::string source_string, const std::map<std::string, std::string> &replace_map) {
+ auto result_string = source_string;
+
+ std::vector<std::pair<size_t, std::pair<size_t, std::string>>> replacements;
for (const auto &replace_pair : replace_map) {
size_t replace_pos = 0;
while ((replace_pos = source_string.find(replace_pair.first, replace_pos)) != std::string::npos) {
- source_string.replace(replace_pos, replace_pair.first.length(), replace_pair.second);
- replace_pos += replace_pair.second.length();
+ replacements.emplace_back(std::make_pair(replace_pos,
+ std::make_pair(replace_pair.first.length(), replace_pair.second)));
+ replace_pos += replace_pair.first.length();
}
}
- return source_string;
+ std::sort(replacements.begin(), replacements.end(), [](const std::pair<size_t, std::pair<size_t, std::string>> a,
+ const std::pair<size_t, std::pair<size_t, std::string>> &b) {
+ return a.first > b.first;
+ });
+
+ for (const auto &replacement : replacements) {
+ result_string = source_string.replace(replacement.first, replacement.second.first, replacement.second.second);
+ }
+
+ return result_string;
}
};
http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/253a1b74/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp
----------------------------------------------------------------------
diff --git a/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp b/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp
index a198335..12c60ca 100644
--- a/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp
+++ b/libminifi/test/expression-language-tests/ExpressionLanguageTests.cpp
@@ -1046,14 +1046,14 @@ TEST_CASE("Encode XML", "[expressionEncodeXML]") { // NOLINT
auto flow_file_a = std::make_shared<MockFlowFile>();
flow_file_a->addAttribute("message", "Zero > One < \"two!\" & 'true'");
- REQUIRE("Zero > One < &quot;two!&quot; & 'true'" == expr({flow_file_a}).asString());
+ REQUIRE("Zero > One < "two!" & 'true'" == expr({flow_file_a}).asString());
}
TEST_CASE("Decode XML", "[expressionDecodeXML]") { // NOLINT
auto expr = expression::compile("${message:unescapeXml()}");
auto flow_file_a = std::make_shared<MockFlowFile>();
- flow_file_a->addAttribute("message", "Zero > One < &quot;two!&quot; & 'true'");
+ flow_file_a->addAttribute("message", "Zero > One < "two!" & 'true'");
REQUIRE("Zero > One < \"two!\" & 'true'" == expr({flow_file_a}).asString());
}
@@ -1064,3 +1064,35 @@ TEST_CASE("Encode Decode XML", "[expressionEncodeDecodeXML]") { // NOLINT
flow_file_a->addAttribute("message", "Zero > One < \"two!\" & 'true'");
REQUIRE("Zero > One < \"two!\" & 'true'" == expr({flow_file_a}).asString());
}
+
+TEST_CASE("Encode CSV", "[expressionEncodeCSV]") { // NOLINT
+ auto expr = expression::compile("${message:escapeCsv()}");
+
+ auto flow_file_a = std::make_shared<MockFlowFile>();
+ flow_file_a->addAttribute("message", "Zero > One < \"two!\" & 'true'");
+ REQUIRE("\"Zero > One < \"\"two!\"\" & 'true'\"" == expr({flow_file_a}).asString());
+}
+
+TEST_CASE("Decode CSV", "[expressionDecodeCSV]") { // NOLINT
+ auto expr = expression::compile("${message:unescapeCsv()}");
+
+ auto flow_file_a = std::make_shared<MockFlowFile>();
+ flow_file_a->addAttribute("message", R"("Zero > One < ""two!"" & 'true'")");
+ REQUIRE("Zero > One < \"two!\" & 'true'" == expr({flow_file_a}).asString());
+}
+
+TEST_CASE("Decode CSV 2", "[expressionDecodeCSV2]") { // NOLINT
+ auto expr = expression::compile("${message:unescapeCsv()}");
+
+ auto flow_file_a = std::make_shared<MockFlowFile>();
+ flow_file_a->addAttribute("message", R"("quoted")");
+ REQUIRE("\"quoted\"" == expr({flow_file_a}).asString());
+}
+
+TEST_CASE("Encode Decode CSV", "[expressionEncodeDecodeCSV]") { // NOLINT
+ auto expr = expression::compile("${message:escapeCsv():unescapeCsv()}");
+
+ auto flow_file_a = std::make_shared<MockFlowFile>();
+ flow_file_a->addAttribute("message", "Zero > One < \"two!\" & 'true'");
+ REQUIRE("Zero > One < \"two!\" & 'true'" == expr({flow_file_a}).asString());
+}