You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@nifi.apache.org by GitBox <gi...@apache.org> on 2019/01/03 04:24:51 UTC

[GitHub] bdesert closed pull request #3183: NIFI-5826 Fix to escaped backslash

bdesert closed pull request #3183: NIFI-5826 Fix to escaped backslash
URL: https://github.com/apache/nifi/pull/3183
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/filter/ContainsRegex.java b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/filter/ContainsRegex.java
index 02c3ca9fab..8952386496 100644
--- a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/filter/ContainsRegex.java
+++ b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/filter/ContainsRegex.java
@@ -24,6 +24,7 @@
 import org.apache.nifi.record.path.RecordPathEvaluationContext;
 import org.apache.nifi.record.path.paths.LiteralValuePath;
 import org.apache.nifi.record.path.paths.RecordPathSegment;
+import org.apache.nifi.record.path.util.RecordPathUtils;
 import org.apache.nifi.serialization.record.util.DataTypeUtils;
 
 public class ContainsRegex extends FunctionFilter {
@@ -39,7 +40,7 @@ public ContainsRegex(RecordPathSegment recordPath, final RecordPathSegment regex
         if (regexPath instanceof LiteralValuePath) {
             final FieldValue fieldValue = ((LiteralValuePath) regexPath).evaluate((RecordPathEvaluationContext) null).findFirst().get();
             final Object value = fieldValue.getValue();
-            final String regex = DataTypeUtils.toString(value, (String) null);
+            final String regex = RecordPathUtils.unescapeBackslash(DataTypeUtils.toString(value, (String) null));
             compiledPattern = Pattern.compile(regex);
         } else {
             compiledPattern = null;
@@ -60,7 +61,7 @@ protected boolean test(final FieldValue fieldValue, final RecordPathEvaluationCo
                 return false;
             }
 
-            final String regex = DataTypeUtils.toString(value, (String) null);
+            final String regex = RecordPathUtils.unescapeBackslash(DataTypeUtils.toString(value, (String) null));
             pattern = Pattern.compile(regex);
         } else {
             pattern = compiledPattern;
diff --git a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/filter/MatchesRegex.java b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/filter/MatchesRegex.java
index a50f895a4e..a67818448d 100644
--- a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/filter/MatchesRegex.java
+++ b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/filter/MatchesRegex.java
@@ -24,6 +24,7 @@
 import org.apache.nifi.record.path.RecordPathEvaluationContext;
 import org.apache.nifi.record.path.paths.LiteralValuePath;
 import org.apache.nifi.record.path.paths.RecordPathSegment;
+import org.apache.nifi.record.path.util.RecordPathUtils;
 import org.apache.nifi.serialization.record.util.DataTypeUtils;
 
 public class MatchesRegex extends FunctionFilter {
@@ -39,7 +40,7 @@ public MatchesRegex(RecordPathSegment recordPath, final RecordPathSegment regexP
         if (regexPath instanceof LiteralValuePath) {
             final FieldValue fieldValue = ((LiteralValuePath) regexPath).evaluate((RecordPathEvaluationContext) null).findFirst().get();
             final Object value = fieldValue.getValue();
-            final String regex = DataTypeUtils.toString(value, (String) null);
+            final String regex = RecordPathUtils.unescapeBackslash(DataTypeUtils.toString(value, (String) null));
             compiledPattern = Pattern.compile(regex);
         } else {
             compiledPattern = null;
@@ -60,7 +61,7 @@ protected boolean test(final FieldValue fieldValue, final RecordPathEvaluationCo
                 return false;
             }
 
-            final String regex = DataTypeUtils.toString(value, (String) null);
+            final String regex = RecordPathUtils.unescapeBackslash(DataTypeUtils.toString(value, (String) null));
             pattern = Pattern.compile(regex);
         } else {
             pattern = compiledPattern;
diff --git a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/ReplaceRegex.java b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/ReplaceRegex.java
index 06f9f53c9d..9412bab8f5 100644
--- a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/ReplaceRegex.java
+++ b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/ReplaceRegex.java
@@ -45,7 +45,8 @@ public ReplaceRegex(final RecordPathSegment recordPath, final RecordPathSegment
         if (searchValue instanceof LiteralValuePath) {
             final FieldValue fieldValue = ((LiteralValuePath) searchValue).evaluate((RecordPathEvaluationContext) null).findFirst().get();
             final Object value = fieldValue.getValue();
-            final String regex = DataTypeUtils.toString(value, (String) null);
+            final String regex = RecordPathUtils.unescapeBackslash(DataTypeUtils.toString(value, (String) null));
+
             compiledPattern = Pattern.compile(regex);
         } else {
             compiledPattern = null;
@@ -79,7 +80,7 @@ public ReplaceRegex(final RecordPathSegment recordPath, final RecordPathSegment
                         return fv;
                     }
 
-                    final String regex = DataTypeUtils.toString(fieldValue, (String) null);
+                    final String regex = RecordPathUtils.unescapeBackslash(DataTypeUtils.toString(fieldValue, (String) null));
                     pattern = Pattern.compile(regex);
                 } else {
                     pattern = compiledPattern;
diff --git a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/util/RecordPathUtils.java b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/util/RecordPathUtils.java
index e8056e49b8..9c5ab29cb4 100644
--- a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/util/RecordPathUtils.java
+++ b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/util/RecordPathUtils.java
@@ -39,4 +39,52 @@ public static String getFirstStringValue(final RecordPathSegment segment, final
 
         return stringValue;
     }
+
+    /**
+     * This method handles backslash sequences after ANTLR parser converts all backslash into double ones
+     * with exception for \t, \r and \n. See
+     * <a href="file:../../../../../../../../../src/main/antlr3/org/apache/nifi/record/path/RecordPathParser.g">org/apache/nifi/record/path/RecordPathParser.g</a>
+     *
+     * @param value to be handled
+     * @return transformed string from given value.
+     */
+    public static String unescapeBackslash(String value) {
+        if (value == null || value.isEmpty()) {
+            return value;
+        }
+        // need to escape characters after backslashes
+        final StringBuilder sb = new StringBuilder();
+        boolean lastCharIsBackslash = false;
+        for (int i = 0; i < value.length(); i++) {
+            final char c = value.charAt(i);
+
+            if (lastCharIsBackslash) {
+                switch (c) {
+                case 'n':
+                    sb.append("\n");
+                    break;
+                case 'r':
+                    sb.append("\r");
+                    break;
+                case '\\':
+                    sb.append("\\");
+                    break;
+                case 't':
+                    sb.append("\\t");
+                    break;
+                default:
+                    sb.append("\\").append(c);
+                    break;
+                }
+
+                lastCharIsBackslash = false;
+            } else if (c == '\\') {
+                lastCharIsBackslash = true;
+            } else {
+                sb.append(c);
+            }
+        }
+
+        return sb.toString();
+    }
 }
diff --git a/nifi-commons/nifi-record-path/src/test/java/org/apache/nifi/record/path/TestRecordPath.java b/nifi-commons/nifi-record-path/src/test/java/org/apache/nifi/record/path/TestRecordPath.java
index 67c14e6475..d596cfad74 100644
--- a/nifi-commons/nifi-record-path/src/test/java/org/apache/nifi/record/path/TestRecordPath.java
+++ b/nifi-commons/nifi-record-path/src/test/java/org/apache/nifi/record/path/TestRecordPath.java
@@ -1008,12 +1008,18 @@ public void testReplaceRegex() {
         final List<RecordField> fields = new ArrayList<>();
         fields.add(new RecordField("id", RecordFieldType.INT.getDataType()));
         fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
+        fields.add(new RecordField("name1", RecordFieldType.STRING.getDataType()));
+        fields.add(new RecordField("name2", RecordFieldType.STRING.getDataType()));
+        fields.add(new RecordField("col1", RecordFieldType.STRING.getDataType()));
 
         final RecordSchema schema = new SimpleRecordSchema(fields);
 
         final Map<String, Object> values = new HashMap<>();
         values.put("id", 48);
         values.put("name", "John Doe");
+        values.put("name1", "John\\Doe");
+        values.put("name2", "John[]Doe");
+        values.put("col1", "tab \t, new line \n, CR \r, backslach \\");
         final Record record = new MapRecord(schema, values);
 
         assertEquals("ohn oe", RecordPath.compile("replaceRegex(/name, '[JD]', '')").evaluate(record).getSelectedFields().findFirst().get().getValue());
@@ -1026,6 +1032,32 @@ public void testReplaceRegex() {
         assertEquals("Jxohn Dxoe", RecordPath.compile("replaceRegex(/name, '(?<hello>[JD])', '${hello}x')").evaluate(record).getSelectedFields().findFirst().get().getValue());
 
         assertEquals("48ohn 48oe", RecordPath.compile("replaceRegex(/name, '(?<hello>[JD])', /id)").evaluate(record).getSelectedFields().findFirst().get().getValue());
+
+        assertEquals("John48Doe", RecordPath.compile("replaceRegex(/name1, '\\\\\\\\', /id)").evaluate(record).getSelectedFields().findFirst().get().getValue());
+        assertEquals("John48Doe", RecordPath.compile("replaceRegex(/name, '\\s', /id)").evaluate(record).getSelectedFields().findFirst().get().getValue());
+        assertEquals("John48Doe", RecordPath.compile("replaceRegex(/name, '\\\\s', /id)").evaluate(record).getSelectedFields().findFirst().get().getValue());
+        assertEquals("John  Doe", RecordPath.compile("replaceRegex(/name2, '[\\\\[\\\\]]', ' ')").evaluate(record).getSelectedFields().findFirst().get().getValue());
+        //test replace tab and new lines, and still keeps single backslash
+        assertEquals("tab *, new line *, CR *, backslach \\", RecordPath.compile("replaceRegex(/col1, '[\\t|\\r|\\n]', '*')").evaluate(record).getSelectedFields().findFirst().get().getValue());
+
+        boolean exceptionThrown = false;
+        try{
+            // illegal regex pattern, because double backslash will be escaped to single one, and would expect a char according to regex rules
+            RecordPath.compile("replaceRegex(/col1, '\\\\', '*')").evaluate(record).getSelectedFields().findFirst().get().getValue();
+        }catch(RecordPathException e) {
+            exceptionThrown = true;
+        }
+        assertTrue(exceptionThrown);
+
+        exceptionThrown = false;
+        try{
+            // illegal regex pattern, because single backslash will replaced by ANTLR with double backslash,
+            // then will be escaped back to single one, and would expect a char according to regex rules
+            RecordPath.compile("replaceRegex(/col1, '\\', '*')").evaluate(record).getSelectedFields().findFirst().get().getValue();
+        }catch(RecordPathException e) {
+            exceptionThrown = true;
+        }
+        assertTrue(exceptionThrown);
     }
 
     @Test


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services