You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by jo...@apache.org on 2022/04/11 14:36:14 UTC

[nifi] 02/04: NIFI-9884 - JacksonCSVRecordReader ignores specified encoding

This is an automated email from the ASF dual-hosted git repository.

joewitt pushed a commit to branch support/nifi-1.16
in repository https://gitbox.apache.org/repos/asf/nifi.git

commit 51efe1f00fbfb2adf7d6721e1697a92e3bf56cb8
Author: Paul Grey <gr...@yahoo.com>
AuthorDate: Wed Apr 6 12:46:06 2022 -0400

    NIFI-9884 - JacksonCSVRecordReader ignores specified encoding
    
    NIFI-9884 - JacksonCSVRecordReader ignores specified encoding; test case for ISO-8859-1
    
    Signed-off-by: Matthew Burgess <ma...@apache.org>
    
    This closes #5941
---
 .../apache/nifi/csv/JacksonCSVRecordReader.java    |  2 +-
 .../nifi/csv/TestJacksonCSVRecordReader.java       | 27 +++++++++++++++++++++-
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/JacksonCSVRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/JacksonCSVRecordReader.java
index f3c3acce8e..d9c1f8d99b 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/JacksonCSVRecordReader.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/JacksonCSVRecordReader.java
@@ -59,7 +59,7 @@ public class JacksonCSVRecordReader extends AbstractCSVRecordReader {
                                   final String dateFormat, final String timeFormat, final String timestampFormat, final String encoding) throws IOException {
         super(logger, schema, hasHeader, ignoreHeader, dateFormat, timeFormat, timestampFormat);
 
-        final Reader reader = new InputStreamReader(new BOMInputStream(in));
+        final Reader reader = new InputStreamReader(new BOMInputStream(in), encoding);
 
         CsvSchema.Builder csvSchemaBuilder = CsvSchema.builder()
                 .setColumnSeparator(csvFormat.getDelimiter())
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestJacksonCSVRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestJacksonCSVRecordReader.java
index 8b7787f9f8..422b4496f3 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestJacksonCSVRecordReader.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestJacksonCSVRecordReader.java
@@ -34,6 +34,7 @@ import java.io.ByteArrayInputStream;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -69,7 +70,7 @@ public class TestJacksonCSVRecordReader {
         fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
         final RecordSchema schema = new SimpleRecordSchema(fields);
 
-        try (final InputStream bais = new ByteArrayInputStream(text.getBytes());
+        try (final InputStream bais = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
              final JacksonCSVRecordReader reader = new JacksonCSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false,
                      RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), "UTF-8")) {
 
@@ -80,6 +81,30 @@ public class TestJacksonCSVRecordReader {
         }
     }
 
+    @Test
+    public void testISO8859() throws IOException, MalformedRecordException {
+        final String text = "name\nÄËÖÜ";
+        final byte[] bytesUTF = text.getBytes(StandardCharsets.UTF_8);
+        final byte[] bytes8859 = text.getBytes(StandardCharsets.ISO_8859_1);
+        assertEquals(13, bytesUTF.length, "expected size=13 for UTF-8 representation of test data");
+        assertEquals(9, bytes8859.length, "expected size=9 for ISO-8859-1 representation of test data");
+
+        final List<RecordField> fields = new ArrayList<>();
+        fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
+        final RecordSchema schema = new SimpleRecordSchema(fields);
+
+        try (final InputStream bais = new ByteArrayInputStream(text.getBytes(StandardCharsets.ISO_8859_1));
+             final JacksonCSVRecordReader reader = new JacksonCSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false,
+                     RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(),
+                     StandardCharsets.ISO_8859_1.name())) {
+
+            final Record record = reader.nextRecord();
+            final String name = (String)record.getValue("name");
+
+            assertEquals("ÄËÖÜ", name);
+        }
+    }
+
     @Test
     public void testDate() throws IOException, MalformedRecordException {
         final String dateValue = "1983-11-30";