You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by mo...@apache.org on 2018/06/06 17:41:41 UTC

nifi git commit: NIFI-4272 support multiple captures when EL is present in replacement value This closes #2748

Repository: nifi
Updated Branches:
  refs/heads/master eedf1237a -> f7f809c3d


NIFI-4272 support multiple captures when EL is present in replacement value
This closes #2748

Signed-off-by: Mike Moser <mo...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/f7f809c3
Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/f7f809c3
Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/f7f809c3

Branch: refs/heads/master
Commit: f7f809c3d3632eea5234b31740984b73de322464
Parents: eedf123
Author: Otto Fowler <ot...@gmail.com>
Authored: Wed May 30 16:53:55 2018 -0400
Committer: Mike Moser <mo...@apache.org>
Committed: Wed Jun 6 17:23:20 2018 +0000

----------------------------------------------------------------------
 .../nifi/processors/standard/ReplaceText.java   | 110 +++++++++++++++----
 .../processors/standard/TestReplaceText.java    |  62 ++++++++++-
 2 files changed, 150 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi/blob/f7f809c3/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java
index de17213..f303796 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ReplaceText.java
@@ -18,16 +18,17 @@ package org.apache.nifi.processors.standard;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.nifi.annotation.behavior.EventDriven;
-import org.apache.nifi.annotation.behavior.SystemResourceConsideration;
 import org.apache.nifi.annotation.behavior.InputRequirement;
 import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
 import org.apache.nifi.annotation.behavior.SideEffectFree;
 import org.apache.nifi.annotation.behavior.SupportsBatching;
 import org.apache.nifi.annotation.behavior.SystemResource;
+import org.apache.nifi.annotation.behavior.SystemResourceConsideration;
 import org.apache.nifi.annotation.documentation.CapabilityDescription;
 import org.apache.nifi.annotation.documentation.Tags;
 import org.apache.nifi.components.AllowableValue;
 import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.components.PropertyValue;
 import org.apache.nifi.components.ValidationContext;
 import org.apache.nifi.components.ValidationResult;
 import org.apache.nifi.components.Validator;
@@ -79,7 +80,9 @@ import java.util.regex.Pattern;
 @SystemResourceConsideration(resource = SystemResource.MEMORY)
 public class ReplaceText extends AbstractProcessor {
 
-    private static Pattern REPLACEMENT_NORMALIZATION_PATTERN = Pattern.compile("(\\$\\D)");
+    private static Pattern QUOTED_GROUP_REF_PATTERN = Pattern.compile("\\$\\{\\s*?'\\$\\d+?'.+?\\}");
+    private static Pattern DOUBLE_QUOTED_GROUP_REF_PATTERN = Pattern.compile("\\$\\{\\s*?\"\\$\\d+?\".+?\\}");
+    private static Pattern LITERAL_QUOTED_PATTERN = Pattern.compile("literal\\(('.*?')\\)",Pattern.DOTALL);
 
     // Constants
     public static final String LINE_BY_LINE = "Line-by-Line";
@@ -301,12 +304,8 @@ public class ReplaceText extends AbstractProcessor {
 
     // If we find a back reference that is not valid, then we will treat it as a literal string. For example, if we have 3 capturing
     // groups and the Replacement Value has the value is "I owe $8 to him", then we want to treat the $8 as a literal "$8", rather
-    // than attempting to use it as a back reference.
+    // than attempting to use it as a back reference.  We do this even if there are no capture groups.
     private static String escapeLiteralBackReferences(final String unescaped, final int numCapturingGroups) {
-        if (numCapturingGroups == 0) {
-            return unescaped;
-        }
-
         String value = unescaped;
         final Matcher backRefMatcher = unescapedBackReferencePattern.matcher(value); // consider unescaped back references
         while (backRefMatcher.find()) {
@@ -542,12 +541,18 @@ public class ReplaceText extends AbstractProcessor {
                         additionalAttrs.put("$" + i, groupValue);
                     }
 
-                    String replacement = context.getProperty(REPLACEMENT_VALUE).evaluateAttributeExpressions(flowFile, additionalAttrs, escapeBackRefDecorator).getValue();
+                    // prepare the string and do the regex replace first
+                    // then evaluate the EL on the result
+                    String replacement = context.getProperty(REPLACEMENT_VALUE).getValue();
                     replacement = escapeLiteralBackReferences(replacement, numCapturingGroups);
+                    replacement = escapeExpressionDollarSigns(replacement);
+                    replacement = wrapLiterals(replacement);
+                    replacement = contentString.replaceAll(searchRegex, replacement);
+                    replacement = escapeForEvaluation(replacement);
 
-                    String replacementFinal = normalizeReplacementString(replacement);
+                    PropertyValue tempValue =  context.newPropertyValue(replacement);
+                    final String updatedValue = tempValue.evaluateAttributeExpressions(flowFile, additionalAttrs, null).getValue();
 
-                    final String updatedValue = contentString.replaceAll(searchRegex, replacementFinal);
                     updatedFlowFile = session.write(flowFile, new OutputStreamCallback() {
                         @Override
                         public void process(final OutputStream out) throws IOException {
@@ -574,12 +579,17 @@ public class ReplaceText extends AbstractProcessor {
                                         additionalAttrs.put("$" + i, groupValue);
                                     }
 
-                                    String replacement = context.getProperty(REPLACEMENT_VALUE).evaluateAttributeExpressions(flowFile, additionalAttrs, escapeBackRefDecorator).getValue();
+                                    // prepare the string and do the regex replace first
+                                    // then evaluate the EL on the result
+                                    String replacement = context.getProperty(REPLACEMENT_VALUE).getValue();
                                     replacement = escapeLiteralBackReferences(replacement, numCapturingGroups);
+                                    replacement = escapeExpressionDollarSigns(replacement);
+                                    replacement = wrapLiterals(replacement);
+                                    replacement = oneLine.replaceAll(searchRegex, replacement);
+                                    replacement = escapeForEvaluation(replacement);
 
-                                    String replacementFinal = normalizeReplacementString(replacement);
-
-                                    final String updatedValue = oneLine.replaceAll(searchRegex, replacementFinal);
+                                    PropertyValue tempValue =  context.newPropertyValue(replacement);
+                                    final String updatedValue = tempValue.evaluateAttributeExpressions(flowFile, additionalAttrs, null).getValue();
                                     bw.write(updatedValue);
                                 } else {
                                     // No match. Just write out the line as it was.
@@ -659,16 +669,76 @@ public class ReplaceText extends AbstractProcessor {
     }
 
     /**
+     * Wraps '$1' with the {@code literal} function for EL evaluation.
+     * @param possibleLiteral the {@code String} to evaluate.
+     * @return {@code String} with literals wrapped.  If no literals or Expression Lanaguage present the passed string
+     * is returned.
+     */
+    private static String wrapLiterals(String possibleLiteral) {
+        String replacementFinal = possibleLiteral;
+        if (!possibleLiteral.contains("${")) {
+            return possibleLiteral;
+        }
+
+        if (QUOTED_GROUP_REF_PATTERN.matcher(replacementFinal).find()) {
+            replacementFinal = replacementFinal.replaceAll("(\\$\\{\\s*?)('\\$\\d+?')(.*\\})", "$1literal($2)$3");
+        }
+
+        if (DOUBLE_QUOTED_GROUP_REF_PATTERN.matcher(replacementFinal).find()) {
+            replacementFinal = replacementFinal.replaceAll("(\\$\\{\\s*?)(\"\\$\\d+?\")(.*\\})", "$1literal($2)$3");
+        }
+
+        return replacementFinal;
+    }
+
+    /**
      * If we have a '$' followed by anything other than a number, then escape
-     * it. E.g., '$d' becomes '\$d' so that it can be used as a literal in a
+     * it if it is not already escaped. E.g., '$d' becomes '\$d' so that it can be used as a literal in a
      * regex.
      */
-    private static String normalizeReplacementString(String replacement) {
-        String replacementFinal = replacement;
-        if (REPLACEMENT_NORMALIZATION_PATTERN.matcher(replacement).find()) {
-            replacementFinal = Matcher.quoteReplacement(replacement);
+    private static String escapeExpressionDollarSigns(String replacement) {
+
+        // are there expressions or group references
+        if (replacement.indexOf('$') == -1) {
+            return replacement;
         }
-        return replacementFinal;
+        StringBuilder sb = new StringBuilder();
+        boolean lastWasEscape = false;
+        for (int i=0; i<replacement.length(); i++) {
+            char c = replacement.charAt(i);
+            if (c == '\\' ) {
+                lastWasEscape = true;
+            } else {
+                if ( c == '$') {
+                    if (!lastWasEscape && !Character.isDigit(replacement.charAt(i+1))) {
+                        sb.append('\\');
+                    }
+                }
+                lastWasEscape = false;
+            }
+            sb.append(c);
+        }
+       return sb.toString();
+    }
+
+    /**
+     * Escapes a {@code String} containing literal('') EL values.
+     * @param contentString the {@code String}
+     * @return the escaped {@code String}. If no literal() is present, then the input {@code String} will be returned
+     */
+    private static String escapeForEvaluation(String contentString) {
+        final Matcher matcher = LITERAL_QUOTED_PATTERN.matcher(contentString);
+        String returnString = contentString;
+        while(matcher.find()) {
+            for (int i = 1; i <= matcher.groupCount(); i ++) {
+                String replacement = matcher.group(i)
+                    .replaceAll("\\n","\\\\n")
+                    .replaceAll("\\r","\\\\r")
+                    .replaceAll("\\t","\\\\t");
+                returnString = new StringBuilder(returnString).replace(matcher.start(i),matcher.end(i),replacement).toString();
+            }
+        }
+        return returnString;
     }
 
     private interface ReplacementStrategyExecutor {

http://git-wip-us.apache.org/repos/asf/nifi/blob/f7f809c3/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java
index 3755883..7505233 100644
--- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java
+++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestReplaceText.java
@@ -79,9 +79,23 @@ public class TestReplaceText {
     }
 
     @Test
+    public void testEscapedEnough$InReplacementCanReturnEscaped$() throws IOException {
+        final TestRunner runner = getRunner();
+        runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*$)");
+        runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "a\\\\\\$b");
+
+        runner.enqueue("a$a,b,c,d");
+        runner.run();
+
+        runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
+        final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
+        out.assertContentEquals("a\\$b".getBytes("UTF-8"));
+    }
+
+    @Test
     public void testWithEscaped$InReplacement() throws IOException {
         final TestRunner runner = getRunner();
-        runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s:^.*$)");
+        runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*$)");
         runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "a\\$b");
 
         runner.enqueue("a$a,b,c,d");
@@ -89,7 +103,7 @@ public class TestReplaceText {
 
         runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
         final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
-        out.assertContentEquals("a\\$b".getBytes("UTF-8"));
+        out.assertContentEquals("a$b".getBytes("UTF-8"));
     }
 
     @Test
@@ -107,6 +121,34 @@ public class TestReplaceText {
     }
 
     @Test
+    public void testWithSingleQuotedELInReplacement() throws IOException {
+        final TestRunner runner = getRunner();
+        runner.setProperty(ReplaceText.SEARCH_VALUE, "\"([a-z]+)\":\"(\\w+)\"");
+        runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "\"${'$1':toUpper()}\":\"$2\"");
+        runner.enqueue("{\"name\":\"Smith\",\"middle\":\"nifi\",\"firstname\":\"John\"}");
+        runner.run();
+
+        runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
+        final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
+        out.assertContentEquals("{\"NAME\":\"Smith\",\"MIDDLE\":\"nifi\",\"FIRSTNAME\":\"John\"}");
+
+    }
+
+    @Test
+    public void testWithDoubleQuotedELInReplacement() throws IOException {
+        final TestRunner runner = getRunner();
+        runner.setProperty(ReplaceText.SEARCH_VALUE, "\"([a-z]+)\":\"(\\w+)\"");
+        runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "\"${\"$1\":toUpper()}\":\"$2\"");
+        runner.enqueue("{\"name\":\"Smith\",\"middle\":\"nifi\",\"firstname\":\"John\"}");
+        runner.run();
+
+        runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
+        final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
+        out.assertContentEquals("{\"NAME\":\"Smith\",\"MIDDLE\":\"nifi\",\"FIRSTNAME\":\"John\"}");
+
+    }
+
+    @Test
     public void testPrependSimple() throws IOException {
         final TestRunner runner = getRunner();
         runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "TEST");
@@ -1100,6 +1142,22 @@ public class TestReplaceText {
     }
 
     @Test
+    public void testRegexWithELAndELSpecialChars() throws Exception {
+        final TestRunner runner = getRunner();
+        runner.setProperty(ReplaceText.SEARCH_VALUE, "(?s)(^.*$)");
+        runner.setProperty(ReplaceText.REPLACEMENT_VALUE, "${'$1':toUpper()}"); // will uppercase group with good Java regex
+        runner.setProperty(ReplaceText.REPLACEMENT_STRATEGY, ReplaceText.REGEX_REPLACE);
+        runner.setProperty(ReplaceText.EVALUATION_MODE, ReplaceText.ENTIRE_TEXT);
+
+        runner.enqueue("testing\n\t\r123".getBytes());
+        runner.run();
+
+        runner.assertAllFlowFilesTransferred(ReplaceText.REL_SUCCESS, 1);
+        final MockFlowFile out = runner.getFlowFilesForRelationship(ReplaceText.REL_SUCCESS).get(0);
+        out.assertContentEquals("TESTING\n\t\r123");
+    }
+
+    @Test
     public void testRegexNoCaptureDefaultReplacement() throws IOException {
         // Test the old Default Regex and new Default Regex with the default replacement.  This should fail
         // because the regex does not create a capture group.