You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by ma...@apache.org on 2022/04/08 17:21:19 UTC
[nifi] branch main updated: NIFI-9884 - JacksonCSVRecordReader ignores specified encoding
This is an automated email from the ASF dual-hosted git repository.
mattyb149 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/main by this push:
new 2c83149c6d NIFI-9884 - JacksonCSVRecordReader ignores specified encoding
2c83149c6d is described below
commit 2c83149c6d387ef022d7f59bd35ebda51f502b37
Author: Paul Grey <gr...@yahoo.com>
AuthorDate: Wed Apr 6 12:46:06 2022 -0400
NIFI-9884 - JacksonCSVRecordReader ignores specified encoding
NIFI-9884 - JacksonCSVRecordReader ignores specified encoding; test case for ISO-8859-1
Signed-off-by: Matthew Burgess <ma...@apache.org>
This closes #5941
---
.../apache/nifi/csv/JacksonCSVRecordReader.java | 2 +-
.../nifi/csv/TestJacksonCSVRecordReader.java | 27 +++++++++++++++++++++-
2 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/JacksonCSVRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/JacksonCSVRecordReader.java
index f3c3acce8e..d9c1f8d99b 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/JacksonCSVRecordReader.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/JacksonCSVRecordReader.java
@@ -59,7 +59,7 @@ public class JacksonCSVRecordReader extends AbstractCSVRecordReader {
final String dateFormat, final String timeFormat, final String timestampFormat, final String encoding) throws IOException {
super(logger, schema, hasHeader, ignoreHeader, dateFormat, timeFormat, timestampFormat);
- final Reader reader = new InputStreamReader(new BOMInputStream(in));
+ final Reader reader = new InputStreamReader(new BOMInputStream(in), encoding);
CsvSchema.Builder csvSchemaBuilder = CsvSchema.builder()
.setColumnSeparator(csvFormat.getDelimiter())
diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestJacksonCSVRecordReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestJacksonCSVRecordReader.java
index 8b7787f9f8..422b4496f3 100644
--- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestJacksonCSVRecordReader.java
+++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestJacksonCSVRecordReader.java
@@ -34,6 +34,7 @@ import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@@ -69,7 +70,7 @@ public class TestJacksonCSVRecordReader {
fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
- try (final InputStream bais = new ByteArrayInputStream(text.getBytes());
+ try (final InputStream bais = new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8));
final JacksonCSVRecordReader reader = new JacksonCSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false,
RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(), "UTF-8")) {
@@ -80,6 +81,30 @@ public class TestJacksonCSVRecordReader {
}
}
+ @Test
+ public void testISO8859() throws IOException, MalformedRecordException {
+ final String text = "name\nÄËÖÜ";
+ final byte[] bytesUTF = text.getBytes(StandardCharsets.UTF_8);
+ final byte[] bytes8859 = text.getBytes(StandardCharsets.ISO_8859_1);
+ assertEquals(13, bytesUTF.length, "expected size=13 for UTF-8 representation of test data");
+ assertEquals(9, bytes8859.length, "expected size=9 for ISO-8859-1 representation of test data");
+
+ final List<RecordField> fields = new ArrayList<>();
+ fields.add(new RecordField("name", RecordFieldType.STRING.getDataType()));
+ final RecordSchema schema = new SimpleRecordSchema(fields);
+
+ try (final InputStream bais = new ByteArrayInputStream(text.getBytes(StandardCharsets.ISO_8859_1));
+ final JacksonCSVRecordReader reader = new JacksonCSVRecordReader(bais, Mockito.mock(ComponentLog.class), schema, format, true, false,
+ RecordFieldType.DATE.getDefaultFormat(), RecordFieldType.TIME.getDefaultFormat(), RecordFieldType.TIMESTAMP.getDefaultFormat(),
+ StandardCharsets.ISO_8859_1.name())) {
+
+ final Record record = reader.nextRecord();
+ final String name = (String)record.getValue("name");
+
+ assertEquals("ÄËÖÜ", name);
+ }
+ }
+
@Test
public void testDate() throws IOException, MalformedRecordException {
final String dateValue = "1983-11-30";