You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by ac...@apache.org on 2019/05/15 06:58:59 UTC
[camel] 01/02: CAMEL-13136: File consumer with charset doesn't
parse XML
This is an automated email from the ASF dual-hosted git repository.
acosentino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel.git
commit 4344ab66b3d3256db3f097b7b553135592db4ea5
Author: Tadayoshi Sato <sa...@gmail.com>
AuthorDate: Tue May 14 20:42:20 2019 +0900
CAMEL-13136: File consumer with charset doesn't parse XML
---
.../camel/component/file/GenericFileConverter.java | 115 +--------------------
.../org/apache/camel/converter/IOConverter.java | 23 +----
.../component/file/FileSplitXPathCharsetTest.java | 84 +++++++++++++++
.../IOHelperCharsetTest.java} | 18 ++--
.../file/FileSplitXPathCharsetTest-input.xml | 6 ++
.../main/java/org/apache/camel/util/IOHelper.java | 19 ++++
6 files changed, 124 insertions(+), 141 deletions(-)
diff --git a/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConverter.java b/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConverter.java
index b9cd8b8..706aaab 100644
--- a/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConverter.java
+++ b/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConverter.java
@@ -125,7 +125,7 @@ public final class GenericFileConverter {
} else {
LOG.debug("Read file {} (no charset)", f);
}
- return toInputStream(f, charset);
+ return IOHelper.toInputStream(f, charset);
}
}
if (exchange != null) {
@@ -143,7 +143,7 @@ public final class GenericFileConverter {
// use reader first as it supports the file charset
BufferedReader reader = genericFileToReader(file, exchange);
if (reader != null) {
- return toString(reader);
+ return IOHelper.toString(reader);
}
if (exchange != null) {
// otherwise ensure the body is loaded as we want the content of the body
@@ -184,120 +184,13 @@ public final class GenericFileConverter {
String charset = file.getCharset();
if (charset != null) {
LOG.debug("Read file {} with charset {}", f, file.getCharset());
- return toReader(f, charset);
+ return IOHelper.toReader(f, charset);
} else {
LOG.debug("Read file {} (no charset)", f);
- return toReader(f, ExchangeHelper.getCharsetName(exchange));
+ return IOHelper.toReader(f, ExchangeHelper.getCharsetName(exchange));
}
}
return null;
}
- private static BufferedReader toReader(File file, String charset) throws IOException {
- FileInputStream in = new FileInputStream(file);
- return IOHelper.buffered(new EncodingFileReader(in, charset));
- }
-
- private static InputStream toInputStream(File file, String charset) throws IOException {
- if (charset != null) {
- return new EncodingInputStream(file, charset);
- } else {
- return toInputStream(file);
- }
- }
-
- private static InputStream toInputStream(File file) throws IOException {
- return IOHelper.buffered(new FileInputStream(file));
- }
-
- private static String toString(BufferedReader reader) throws IOException {
- StringBuilder sb = new StringBuilder(1024);
- char[] buf = new char[1024];
- try {
- int len;
- // read until we reach then end which is the -1 marker
- while ((len = reader.read(buf)) != -1) {
- sb.append(buf, 0, len);
- }
- } finally {
- IOHelper.close(reader, "reader", LOG);
- }
-
- return sb.toString();
- }
-
- /**
- * Encoding-aware file reader.
- */
- private static class EncodingFileReader extends InputStreamReader {
-
- private final FileInputStream in;
-
- /**
- * @param in file to read
- * @param charset character set to use
- */
- EncodingFileReader(FileInputStream in, String charset)
- throws FileNotFoundException, UnsupportedEncodingException {
- super(in, charset);
- this.in = in;
- }
-
- @Override
- public void close() throws IOException {
- try {
- super.close();
- } finally {
- in.close();
- }
- }
- }
-
- /**
- * Encoding-aware input stream.
- */
- public static class EncodingInputStream extends InputStream {
-
- private final File file;
- private final BufferedReader reader;
- private final Charset defaultStreamCharset;
-
- private ByteBuffer bufferBytes;
- private CharBuffer bufferedChars = CharBuffer.allocate(4096);
-
- public EncodingInputStream(File file, String charset) throws IOException {
- this.file = file;
- reader = toReader(file, charset);
- defaultStreamCharset = defaultCharset.get();
- }
-
- @Override
- public int read() throws IOException {
- if (bufferBytes == null || bufferBytes.remaining() <= 0) {
- bufferedChars.clear();
- int len = reader.read(bufferedChars);
- bufferedChars.flip();
- if (len == -1) {
- return -1;
- }
- bufferBytes = defaultStreamCharset.encode(bufferedChars);
- }
- return bufferBytes.get();
- }
-
- @Override
- public void close() throws IOException {
- reader.close();
- }
-
- @Override
- public void reset() throws IOException {
- reader.reset();
- }
-
- public InputStream toOriginalInputStream() throws FileNotFoundException {
- return new FileInputStream(file);
- }
- }
-
}
diff --git a/core/camel-base/src/main/java/org/apache/camel/converter/IOConverter.java b/core/camel-base/src/main/java/org/apache/camel/converter/IOConverter.java
index 7bd8590..8f99cbe 100644
--- a/core/camel-base/src/main/java/org/apache/camel/converter/IOConverter.java
+++ b/core/camel-base/src/main/java/org/apache/camel/converter/IOConverter.java
@@ -73,21 +73,6 @@ public final class IOConverter {
return IOHelper.buffered(new FileInputStream(file));
}
- /**
- * Converts the given {@link File} with the given charset to {@link InputStream} with the JVM default charset
- *
- * @param file the file to be converted
- * @param charset the charset the file is read with
- * @return the input stream with the JVM default charset
- */
- public static InputStream toInputStream(File file, String charset) throws IOException {
- if (charset != null) {
- return new IOHelper.EncodingInputStream(file, charset);
- } else {
- return toInputStream(file);
- }
- }
-
@Converter
public static BufferedReader toReader(File file, Exchange exchange) throws IOException {
return IOHelper.toReader(file, ExchangeHelper.getCharsetName(exchange));
@@ -106,15 +91,11 @@ public final class IOConverter {
@Converter
public static BufferedWriter toWriter(File file, Exchange exchange) throws IOException {
FileOutputStream os = new FileOutputStream(file, false);
- return toWriter(os, ExchangeHelper.getCharsetName(exchange));
+ return IOHelper.toWriter(os, ExchangeHelper.getCharsetName(exchange));
}
public static BufferedWriter toWriter(File file, boolean append, String charset) throws IOException {
- return toWriter(new FileOutputStream(file, append), charset);
- }
-
- public static BufferedWriter toWriter(FileOutputStream os, String charset) throws IOException {
- return IOHelper.buffered(new IOHelper.EncodingFileWriter(os, charset));
+ return IOHelper.toWriter(new FileOutputStream(file, append), charset);
}
@Converter
diff --git a/core/camel-core/src/test/java/org/apache/camel/component/file/FileSplitXPathCharsetTest.java b/core/camel-core/src/test/java/org/apache/camel/component/file/FileSplitXPathCharsetTest.java
new file mode 100644
index 0000000..e61fe43
--- /dev/null
+++ b/core/camel-core/src/test/java/org/apache/camel/component/file/FileSplitXPathCharsetTest.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.file;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.camel.ContextTestSupport;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.mock.MockEndpoint;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class FileSplitXPathCharsetTest extends ContextTestSupport {
+
+ private static final String TEST_DIR = "target/data/file-split-xpath-charset";
+
+ private static Path inputCsv = Paths.get(TEST_DIR, "input.csv");
+ private static Path inputXml = Paths.get(TEST_DIR, "input.xml");
+
+ @BeforeClass
+ public static void clearInputFiles() throws IOException {
+ deleteDirectory(TEST_DIR);
+ }
+
+ @Test
+ public void testCsv() throws Exception {
+ MockEndpoint out = getMockEndpoint("mock:result");
+ out.expectedMessageCount(3);
+ out.expectedBodiesReceived("abc", "xyz", "åäö");
+
+ Files.write(inputCsv, "abc,xyz,åäö".getBytes(StandardCharsets.ISO_8859_1));
+
+ out.assertIsSatisfied();
+ }
+
+ @Test
+ public void testXml() throws Exception {
+ MockEndpoint out = getMockEndpoint("mock:result");
+ out.expectedMessageCount(3);
+ out.expectedBodiesReceived("abc", "xyz", "åäö");
+
+ Files.copy(
+ getClass().getResourceAsStream("FileSplitXPathCharsetTest-input.xml"),
+ inputXml);
+
+ out.assertIsSatisfied();
+ }
+
+ @Override
+ protected RouteBuilder createRouteBuilder() {
+ return new RouteBuilder() {
+ @Override
+ public void configure() {
+ // input: *.csv
+ fromF("file:%s?charset=ISO-8859-1&include=.*\\.csv", TEST_DIR)
+ .split().tokenize(",")
+ .to("mock:result");
+
+ // input: *.xml
+ fromF("file:%s?charset=ISO-8859-1&include=.*\\.xml", TEST_DIR)
+ .split().xpath("/foo/bar/text()")
+ .to("mock:result");
+ }
+ };
+ }
+}
diff --git a/core/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java b/core/camel-core/src/test/java/org/apache/camel/util/IOHelperCharsetTest.java
similarity index 91%
rename from core/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java
rename to core/camel-core/src/test/java/org/apache/camel/util/IOHelperCharsetTest.java
index defdc00..e5b2958 100644
--- a/core/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java
+++ b/core/camel-core/src/test/java/org/apache/camel/util/IOHelperCharsetTest.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.camel.converter;
+package org.apache.camel.util;
import java.io.BufferedReader;
import java.io.File;
@@ -27,17 +27,17 @@ import java.nio.file.Paths;
import java.util.Arrays;
import org.apache.camel.ContextTestSupport;
-import org.apache.camel.util.IOHelper;
import org.junit.Test;
-public class IOConverterCharsetTest extends ContextTestSupport {
+public class IOHelperCharsetTest extends ContextTestSupport {
+
private static final String CONTENT = "G\u00f6tzend\u00e4mmerung,Joseph und seine Br\u00fcder";
@Test
public void testToInputStreamFileWithCharsetUTF8() throws Exception {
switchToDefaultCharset(StandardCharsets.UTF_8);
File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt");
- try (InputStream in = IOConverter.toInputStream(file, "UTF-8");
+ try (InputStream in = IOHelper.toInputStream(file, "UTF-8");
BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.UTF_8))) {
String line = reader.readLine();
@@ -51,7 +51,7 @@ public class IOConverterCharsetTest extends ContextTestSupport {
public void testToInputStreamFileWithCharsetUTF8withOtherDefaultEncoding() throws Exception {
switchToDefaultCharset(StandardCharsets.ISO_8859_1);
File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt");
- try (InputStream in = IOConverter.toInputStream(file, "UTF-8");
+ try (InputStream in = IOHelper.toInputStream(file, "UTF-8");
BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.ISO_8859_1));
BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.UTF_8))) {
String line = reader.readLine();
@@ -65,9 +65,9 @@ public class IOConverterCharsetTest extends ContextTestSupport {
public void testToInputStreamFileWithCharsetLatin1() throws Exception {
switchToDefaultCharset(StandardCharsets.UTF_8);
File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt");
- try (InputStream in = IOConverter.toInputStream(file, "ISO-8859-1");
+ try (InputStream in = IOHelper.toInputStream(file, "ISO-8859-1");
BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
- BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), "ISO-8859-1"))) {
+ BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.ISO_8859_1))) {
String line = reader.readLine();
String naiveLine = naiveReader.readLine();
assertEquals(naiveLine, line);
@@ -79,7 +79,7 @@ public class IOConverterCharsetTest extends ContextTestSupport {
public void testToInputStreamFileDirectByteDumpWithCharsetLatin1() throws Exception {
switchToDefaultCharset(StandardCharsets.UTF_8);
File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt");
- try (InputStream in = IOConverter.toInputStream(file, "ISO-8859-1");
+ try (InputStream in = IOHelper.toInputStream(file, "ISO-8859-1");
InputStream naiveIn = Files.newInputStream(Paths.get(file.getAbsolutePath()))) {
byte[] bytes = new byte[8192];
in.read(bytes);
@@ -105,7 +105,7 @@ public class IOConverterCharsetTest extends ContextTestSupport {
public void testToReaderFileWithCharsetLatin1() throws Exception {
File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt");
try (BufferedReader reader = IOHelper.toReader(file, "ISO-8859-1");
- BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), "ISO-8859-1"))) {
+ BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.ISO_8859_1))) {
String line = reader.readLine();
String naiveLine = naiveReader.readLine();
assertEquals(naiveLine, line);
diff --git a/core/camel-core/src/test/resources/org/apache/camel/component/file/FileSplitXPathCharsetTest-input.xml b/core/camel-core/src/test/resources/org/apache/camel/component/file/FileSplitXPathCharsetTest-input.xml
new file mode 100644
index 0000000..5ab8ecc
--- /dev/null
+++ b/core/camel-core/src/test/resources/org/apache/camel/component/file/FileSplitXPathCharsetTest-input.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<foo>
+ <bar>abc</bar>
+ <bar>xyz</bar>
+ <bar>���</bar>
+</foo>
diff --git a/core/camel-util/src/main/java/org/apache/camel/util/IOHelper.java b/core/camel-util/src/main/java/org/apache/camel/util/IOHelper.java
index 0f883e8..ee52b06 100644
--- a/core/camel-util/src/main/java/org/apache/camel/util/IOHelper.java
+++ b/core/camel-util/src/main/java/org/apache/camel/util/IOHelper.java
@@ -617,8 +617,27 @@ public final class IOHelper {
}
}
+ /**
+ * Converts the given {@link File} with the given charset to {@link InputStream} with the JVM default charset
+ *
+ * @param file the file to be converted
+ * @param charset the charset the file is read with
+ * @return the input stream with the JVM default charset
+ */
+ public static InputStream toInputStream(File file, String charset) throws IOException {
+ if (charset != null) {
+ return new EncodingInputStream(file, charset);
+ } else {
+ return buffered(new FileInputStream(file));
+ }
+ }
+
public static BufferedReader toReader(File file, String charset) throws IOException {
FileInputStream in = new FileInputStream(file);
return IOHelper.buffered(new EncodingFileReader(in, charset));
}
+
+ public static BufferedWriter toWriter(FileOutputStream os, String charset) throws IOException {
+ return IOHelper.buffered(new EncodingFileWriter(os, charset));
+ }
}