You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by mc...@apache.org on 2015/10/30 15:29:34 UTC

[08/50] [abbrv] nifi git commit: NIFI-944 Added support and unit tests for escaped characters in ConvertCSVtoAvro processor properties. This closes #87. Reviewed by Tony Kurc

NIFI-944 Added support and unit tests for escaped characters in ConvertCSVtoAvro processor properties. This closes #87. Reviewed by Tony Kurc <tk...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/nifi/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi/commit/e68fdca5
Tree: http://git-wip-us.apache.org/repos/asf/nifi/tree/e68fdca5
Diff: http://git-wip-us.apache.org/repos/asf/nifi/diff/e68fdca5

Branch: refs/heads/NIFI-655
Commit: e68fdca517eac53700c0f38bbaa7a893cfc28d9c
Parents: 518670d
Author: Joe <jo...@impresstv.com>
Authored: Tue Oct 20 23:48:54 2015 -0400
Committer: Tony Kurc <tr...@gmail.com>
Committed: Wed Oct 21 00:01:38 2015 -0400

----------------------------------------------------------------------
 .../nifi/processors/kite/ConvertCSVToAvro.java  | 17 ++++++++-
 .../processors/kite/TestCSVToAvroProcessor.java | 40 ++++++++++++++++++++
 2 files changed, 55 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi/blob/e68fdca5/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/main/java/org/apache/nifi/processors/kite/ConvertCSVToAvro.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/main/java/org/apache/nifi/processors/kite/ConvertCSVToAvro.java b/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/main/java/org/apache/nifi/processors/kite/ConvertCSVToAvro.java
index 6c20a8f..ea84daa 100644
--- a/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/main/java/org/apache/nifi/processors/kite/ConvertCSVToAvro.java
+++ b/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/main/java/org/apache/nifi/processors/kite/ConvertCSVToAvro.java
@@ -30,6 +30,7 @@ import org.apache.avro.Schema;
 import org.apache.avro.file.CodecFactory;
 import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericData.Record;
+import org.apache.commons.lang3.StringEscapeUtils;
 import org.apache.nifi.annotation.documentation.CapabilityDescription;
 import org.apache.nifi.annotation.documentation.Tags;
 import org.apache.nifi.annotation.lifecycle.OnScheduled;
@@ -53,6 +54,7 @@ import org.kitesdk.data.spi.DefaultConfiguration;
 import org.kitesdk.data.spi.filesystem.CSVFileReader;
 import org.kitesdk.data.spi.filesystem.CSVProperties;
 
+
 import static org.apache.nifi.processor.util.StandardValidators.createLongValidator;
 
 @Tags({"kite", "csv", "avro"})
@@ -66,11 +68,15 @@ public class ConvertCSVToAvro extends AbstractKiteProcessor {
         @Override
         public ValidationResult validate(String subject, String input,
                 ValidationContext context) {
+            // Allows special, escaped characters as input, which is then unescaped and converted to a single character.
+            // Examples for special characters: \t (or \u0009), \f.
+            input = unescapeString(input);
+
             return new ValidationResult.Builder()
                     .subject(subject)
                     .input(input)
-                    .explanation("Only single characters are supported")
-                    .valid(input.length() == 1)
+                    .explanation("Only non-null single characters are supported")
+                    .valid(input.length() == 1 && input.charAt(0) != 0)
                     .build();
         }
     };
@@ -295,4 +301,11 @@ public class ConvertCSVToAvro extends AbstractKiteProcessor {
             session.transfer(incomingCSV, FAILURE);
         }
     }
+
+    private static String unescapeString(String input) {
+        if (input.length() > 1) {
+            input = StringEscapeUtils.unescapeJava(input);
+        }
+        return input;
+    }
 }

http://git-wip-us.apache.org/repos/asf/nifi/blob/e68fdca5/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/test/java/org/apache/nifi/processors/kite/TestCSVToAvroProcessor.java
----------------------------------------------------------------------
diff --git a/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/test/java/org/apache/nifi/processors/kite/TestCSVToAvroProcessor.java b/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/test/java/org/apache/nifi/processors/kite/TestCSVToAvroProcessor.java
index 43dea6e..0cde23c 100644
--- a/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/test/java/org/apache/nifi/processors/kite/TestCSVToAvroProcessor.java
+++ b/nifi-nar-bundles/nifi-kite-bundle/nifi-kite-processors/src/test/java/org/apache/nifi/processors/kite/TestCSVToAvroProcessor.java
@@ -48,9 +48,49 @@ public class TestCSVToAvroProcessor {
     public static final String FAILURE_CONTENT = ""
             + ",blue,\n"; // invalid, ID is missing
 
+    public static final String TSV_CONTENT = ""
+            + "1\tgreen\n"
+            + "\tblue\t\n" + // invalid, ID is missing
+            "2\tgrey\t12.95";
+
     public static final String FAILURE_SUMMARY = "" +
             "Field id: cannot make \"long\" value: '': Field id type:LONG pos:0 not set and has no default value";
 
+    /**
+     * Basic test for tab separated files, similar to #test
+     * @throws IOException
+     */
+    @Test
+    public void testTabSeparatedConversion() throws IOException {
+        TestRunner runner = TestRunners.newTestRunner(ConvertCSVToAvro.class);
+        runner.assertNotValid();
+        runner.setProperty(ConvertCSVToAvro.SCHEMA, SCHEMA.toString());
+        runner.setProperty(ConvertCSVToAvro.DELIMITER, "\\t");
+        runner.assertValid();
+
+        runner.enqueue(streamFor(TSV_CONTENT));
+        runner.run();
+
+        long converted = runner.getCounterValue("Converted records");
+        long errors = runner.getCounterValue("Conversion errors");
+        Assert.assertEquals("Should convert 2 rows", 2, converted);
+        Assert.assertEquals("Should reject 1 row", 1, errors);
+
+        runner.assertTransferCount("success", 1);
+        runner.assertTransferCount("failure", 0);
+        runner.assertTransferCount("incompatible", 1);
+
+        MockFlowFile incompatible = runner.getFlowFilesForRelationship("incompatible").get(0);
+        String failureContent = new String(runner.getContentAsByteArray(incompatible),
+                StandardCharsets.UTF_8);
+
+        Assert.assertEquals("Should reject an invalid string and double",
+                TSV_CONTENT, failureContent);
+        Assert.assertEquals("Should accumulate error messages",
+                FAILURE_SUMMARY, incompatible.getAttribute("errors"));
+    }
+
+
     @Test
     public void testBasicConversion() throws IOException {
         TestRunner runner = TestRunners.newTestRunner(ConvertCSVToAvro.class);