You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by mo...@apache.org on 2018/06/06 17:41:41 UTC
nifi git commit: NIFI-4272 support multiple captures when EL is
present in replacement value This closes #2748
Repository: nifi
Updated Branches:
refs/heads/master eedf1237a -> f7f809c3d
NIFI-4272 support multiple captures when EL is present in replacement value
This closes #2748
Signed-off-by: Mike Moser <mo...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/f7f809c3
Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/f7f809c3
Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/f7f809c3
Branch: refs/heads/master
Commit: f7f809c3d3632eea5234b31740984b73de322464
Parents: eedf123
Author: Otto Fowler <ot...@gmail.com>
Authored: Wed May 30 16:53:55 2018 -0400
Committer: Mike Moser <mo...@apache.org>
Committed: Wed Jun 6 17:23:20 2018 +0000
----------------------------------------------------------------------
.../nifi/processors/standard/ReplaceText.java | 110 +++++++++++++++----
.../processors/standard/TestReplaceText.java | 62 ++++++++++-
2 files changed, 150 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/nifi/blob/f7f809c3/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java
index de17213..f303796 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java
@@ -18,16 +18,17 @@ package org.apache.nifi.processors.standard;
import org.apache.commons.io.IOUtils;
import org.apache.nifi.annotation.behavior.EventDriven;
-import org.apache.nifi.annotation.behavior.SystemResourceConsideration;
import org.apache.nifi.annotation.behavior.InputRequirement;
import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
import org.apache.nifi.annotation.behavior.SideEffectFree;
import org.apache.nifi.annotation.behavior.SupportsBatching;
import org.apache.nifi.annotation.behavior.SystemResource;
+import org.apache.nifi.annotation.behavior.SystemResourceConsideration;
import org.apache.nifi.annotation.documentation.CapabilityDescription;
import org.apache.nifi.annotation.documentation.Tags;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.PropertyValue;
import org.apache.nifi.components.ValidationContext;
import org.apache.nifi.components.ValidationResult;
import org.apache.nifi.components.Validator;
@@ -79,7 +80,9 @@ import java.util.regex.Pattern;
@SystemResourceConsideration(resource = SystemResource.MEMORY)
public class ReplaceText extends AbstractProcessor {
- private static Pattern REPLACEMENT_NORMALIZATION_PATTERN = Pattern.compile("(\\$\\D)");
+ private static Pattern QUOTED_GROUP_REF_PATTERN = Pattern.compile("\\$\\{\\s*?'\\$\\d+?'.+?\\}");
+ private static Pattern DOUBLE_QUOTED_GROUP_REF_PATTERN = Pattern.compile("\\$\\{\\s*?\"\\$\\d+?\".+?\\}");
+ private static Pattern LITERAL_QUOTED_PATTERN = Pattern.compile("literal\\(('.*?')\\)",Pattern.DOTALL);
// Constants
public static final String LINE_BY_LINE = "Line-by-Line";
@@ -301,12 +304,8 @@ public class ReplaceText extends AbstractProcessor {
// If we find a back reference that is not valid, then we will treat it as a literal string. For example, if we have 3 capturing
// groups and the Replacement Value has the value is "I owe $8 to him", then we want to treat the $8 as a literal "$8", rather
- // than attempting to use it as a back reference.
+ // than attempting to use it as a back reference. We do this even if there are no capture groups.
private static String escapeLiteralBackReferences(final String unescaped, final int numCapturingGroups) {
- if (numCapturingGroups == 0) {
- return unescaped;
- }
-
String value = unescaped;
final Matcher backRefMatcher = unescapedBackReferencePattern.matcher(value); // consider unescaped back references
while (backRefMatcher.find()) {
@@ -542,12 +541,18 @@ public class ReplaceText extends AbstractProcessor {
additionalAttrs.put("$" + i, groupValue);
}
- String replacement = context.getProperty(REPLACEMENT_VALUE).evaluateAttributeExpressions(flowFile, additionalAttrs, escapeBackRefDecorator).getValue();
+ // prepare the string and do the regex replace first
+ // then evaluate the EL on the result
+ String replacement = context.getProperty(REPLACEMENT_VALUE).getValue();
replacement = escapeLiteralBackReferences(replacement, numCapturingGroups);
+ replacement = escapeExpressionDollarSigns(replacement);
+ replacement = wrapLiterals(replacement);
+ replacement = contentString.replaceAll(searchRegex, replacement);
+ replacement = escapeForEvaluation(replacement);
- String replacementFinal = normalizeReplacementString(replacement);
+ PropertyValue tempValue = context.newPropertyValue(replacement);
+ final String updatedValue = tempValue.evaluateAttributeExpressions(flowFile, additionalAttrs, null).getValue();
- final String updatedValue = contentString.replaceAll(searchRegex, replacementFinal);
updatedFlowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(final OutputStream out) throws IOException {
@@ -574,12 +579,17 @@ public class ReplaceText extends AbstractProcessor {
additionalAttrs.put("$" + i, groupValue);
}
- String replacement = context.getProperty(REPLACEMENT_VALUE).evaluateAttributeExpressions(flowFile, additionalAttrs, escapeBackRefDecorator).getValue();
+ // prepare the string and do the regex replace first
+ // then evaluate the EL on the result
+ String replacement = context.getProperty(REPLACEMENT_VALUE).getValue();
replacement = escapeLiteralBackReferences(replacement, numCapturingGroups);
+ replacement = escapeExpressionDollarSigns(replacement);
+ replacement = wrapLiterals(replacement);
+ replacement = oneLine.replaceAll(searchRegex, replacement);
+ replacement = escapeForEvaluation(replacement);
- String replacementFinal = normalizeReplacementString(replacement);
-
- final String updatedValue = oneLine.replaceAll(searchRegex, replacementFinal);
+ PropertyValue tempValue = context.newPropertyValue(replacement);
+ final String updatedValue = tempValue.evaluateAttributeExpressions(flowFile, additionalAttrs, null).getValue();
bw.write(updatedValue);
} else {
// No match. Just write out the line as it was.
@@ -659,16 +669,76 @@ public class ReplaceText extends AbstractProcessor {
}
/**
+ * Wraps '$1' with the {@code literal} function for EL evaluation.
+ * @param possibleLiteral the {@code String} to evaluate.
+ * @return {@code String} with literals wrapped. If no literals or Expression Lanaguage present the passed string
+ * is returned.
+ */
+ private static String wrapLiterals(String possibleLiteral) {
+ String replacementFinal = possibleLiteral;
+ if (!possibleLiteral.contains("${")) {
+ return possibleLiteral;
+ }
+
+ if (QUOTED_GROUP_REF_PATTERN.matcher(replacementFinal).find()) {
+ replacementFinal = replacementFinal.replaceAll("(\\$\\{\\s*?)('\\$\\d+?')(.*\\})", "$1literal($2)$3");
+ }
+
+ if (DOUBLE_QUOTED_GROUP_REF_PATTERN.matcher(replacementFinal).find()) {
+ replacementFinal = replacementFinal.replaceAll("(\\$\\{\\s*?)(\"\\$\\d+?\")(.*\\})", "$1literal($2)$3");
+ }
+
+ return replacementFinal;
+ }
+
+ /**
* If we have a '$' followed by anything other than a number, then escape
- * it. E.g., '$d' becomes '\$d' so that it can be used as a literal in a
+ * it if it is not already escaped. E.g., '$d' becomes '\$d' so that it can be used as a literal in a
* regex.
*/
- private static String normalizeReplacementString(String replacement) {
- String replacementFinal = replacement;
- if (REPLACEMENT_NORMALIZATION_PATTERN.matcher(replacement).find()) {
- replacementFinal = Matcher.quoteReplacement(replacement);
+ private static String escapeExpressionDollarSigns(String replacement) {
+
+ // are there expressions or group references
+ if (replacement.indexOf('$') == -1) {
+ return replacement;
}
- return replacementFinal;
+ StringBuilder sb = new StringBuilder();
+ boolean lastWasEscape = false;
+ for (int i=0; i<replacement.length(); i++) {
+ char c = replacement.charAt(i);
+ if (c == '\\' ) {
+ lastWasEscape = true;
+ } else {
+ if ( c == '$') {
+ if (!lastWasEscape && !Character.isDigit(replacement.charAt(i+1))) {
+ sb.append('\\');
+ }
+ }
+ lastWasEscape = false;
+ }
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Escapes a {@code String} containing literal('') EL values.
+ * @param contentString the {@code String}
+ * @return the escaped {@code String}. If no literal() is present, then the input {@code String} will be returned
+ */
+ private static String escapeForEvaluation(String contentString) {
+ final Matcher matcher = LITERAL_QUOTED_PATTERN.matcher(contentString);
+ String returnString = contentString;
+ while(matcher.find()) {
+ for (int i = 1; i <= matcher.groupCount(); i ++) {
+ String replacement = matcher.group(i)
+ .replaceAll("\\n","\\\\n")
+ .replaceAll("\\r","\\\\r")
+ .replaceAll("\\t","\\\\t");
+ returnString = new StringBuilder(returnString).replace(matcher.start(i),matcher.end(i),replacement).toString();
+ }
+ }
+ return returnString;
}
private interface ReplacementStrategyExecutor {
http://git-wip-us.apache.org/repos/asf/nifi/blob/f7f809c3/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java
index 3755883..7505233 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java
@@ -79,9 +79,23 @@ public class TestReplaceText {
}
@Test
+ public void testEscapedEnough$InReplacementCanReturnEscaped$() throws IOException {
+ final TestRunner runner = getRunner();
+ runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*$)");
+ runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "a\\\\\\$b");
+
+ runner.enqueue("a$a,b,c,d");
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
+ final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
+ out.assertContentEquals("a\\$b".getBytes("UTF-8"));
+ }
+
+ @Test
public void testWithEscaped$InReplacement() throws IOException {
final TestRunner runner = getRunner();
- runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s:^.*$)");
+ runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*$)");
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "a\\$b");
runner.enqueue("a$a,b,c,d");
@@ -89,7 +103,7 @@ public class TestReplaceText {
runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
- out.assertContentEquals("a\\$b".getBytes("UTF-8"));
+ out.assertContentEquals("a$b".getBytes("UTF-8"));
}
@Test
@@ -107,6 +121,34 @@ public class TestReplaceText {
}
@Test
+ public void testWithSingleQuotedELInReplacement() throws IOException {
+ final TestRunner runner = getRunner();
+ runner.setProperty(ReplaceText.SEARCH_VALUE, "\"([a-z]+)\":\"(\\w+)\"");
+ runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "\"${'$1':toUpper()}\":\"$2\"");
+ runner.enqueue("{\"name\":\"Smith\",\"middle\":\"nifi\",\"firstname\":\"John\"}");
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
+ final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
+ out.assertContentEquals("{\"NAME\":\"Smith\",\"MIDDLE\":\"nifi\",\"FIRSTNAME\":\"John\"}");
+
+ }
+
+ @Test
+ public void testWithDoubleQuotedELInReplacement() throws IOException {
+ final TestRunner runner = getRunner();
+ runner.setProperty(ReplaceText.SEARCH_VALUE, "\"([a-z]+)\":\"(\\w+)\"");
+ runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "\"${\"$1\":toUpper()}\":\"$2\"");
+ runner.enqueue("{\"name\":\"Smith\",\"middle\":\"nifi\",\"firstname\":\"John\"}");
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
+ final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
+ out.assertContentEquals("{\"NAME\":\"Smith\",\"MIDDLE\":\"nifi\",\"FIRSTNAME\":\"John\"}");
+
+ }
+
+ @Test
public void testPrependSimple() throws IOException {
final TestRunner runner = getRunner();
runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST");
@@ -1100,6 +1142,22 @@ public class TestReplaceText {
}
@Test
+ public void testRegexWithELAndELSpecialChars() throws Exception {
+ final TestRunner runner = getRunner();
+ runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*$)");
+ runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${'$1':toUpper()}"); // will uppercase group with good Java regex
+ runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.REGEX_REPLACE);
+ runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.ENTIRE_TEXT);
+
+ runner.enqueue("testing\n\t\r123".getBytes());
+ runner.run();
+
+ runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
+ final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
+ out.assertContentEquals("TESTING\n\t\r123");
+ }
+
+ @Test
public void testRegexNoCaptureDefaultReplacement() throws IOException {
// Test the old Default Regex and new Default Regex with the default replacement. This should fail
// because the regex does not create a capture group.