You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by ac...@apache.org on 2019/05/15 06:58:59 UTC

[camel] 01/02: CAMEL-13136: File consumer with charset doesn't parse XML

This is an automated email from the ASF dual-hosted git repository.

acosentino pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/camel.git

commit 4344ab66b3d3256db3f097b7b553135592db4ea5
Author: Tadayoshi Sato <sa...@gmail.com>
AuthorDate: Tue May 14 20:42:20 2019 +0900

    CAMEL-13136: File consumer with charset doesn't parse XML
---
 .../camel/component/file/GenericFileConverter.java | 115 +--------------------
 .../org/apache/camel/converter/IOConverter.java    |  23 +----
 .../component/file/FileSplitXPathCharsetTest.java  |  84 +++++++++++++++
 .../IOHelperCharsetTest.java}                      |  18 ++--
 .../file/FileSplitXPathCharsetTest-input.xml       |   6 ++
 .../main/java/org/apache/camel/util/IOHelper.java  |  19 ++++
 6 files changed, 124 insertions(+), 141 deletions(-)

diff --git a/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConverter.java b/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConverter.java
index b9cd8b8..706aaab 100644
--- a/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConverter.java
+++ b/components/camel-file/src/main/java/org/apache/camel/component/file/GenericFileConverter.java
@@ -125,7 +125,7 @@ public final class GenericFileConverter {
                 } else {
                     LOG.debug("Read file {} (no charset)", f);
                 }
-                return toInputStream(f, charset);
+                return IOHelper.toInputStream(f, charset);
             }
         }
         if (exchange != null) {
@@ -143,7 +143,7 @@ public final class GenericFileConverter {
         // use reader first as it supports the file charset
         BufferedReader reader = genericFileToReader(file, exchange);
         if (reader != null) {
-            return toString(reader);
+            return IOHelper.toString(reader);
         }
         if (exchange != null) {
             // otherwise ensure the body is loaded as we want the content of the body
@@ -184,120 +184,13 @@ public final class GenericFileConverter {
             String charset = file.getCharset();
             if (charset != null) {
                 LOG.debug("Read file {} with charset {}", f, file.getCharset());
-                return toReader(f, charset);
+                return IOHelper.toReader(f, charset);
             } else {
                 LOG.debug("Read file {} (no charset)", f);
-                return toReader(f, ExchangeHelper.getCharsetName(exchange));
+                return IOHelper.toReader(f, ExchangeHelper.getCharsetName(exchange));
             }
         }
         return null;
     }
 
-    private static BufferedReader toReader(File file, String charset) throws IOException {
-        FileInputStream in = new FileInputStream(file);
-        return IOHelper.buffered(new EncodingFileReader(in, charset));
-    }
-
-    private static InputStream toInputStream(File file, String charset) throws IOException {
-        if (charset != null) {
-            return new EncodingInputStream(file, charset);
-        } else {
-            return toInputStream(file);
-        }
-    }
-
-    private static InputStream toInputStream(File file) throws IOException {
-        return IOHelper.buffered(new FileInputStream(file));
-    }
-
-    private static String toString(BufferedReader reader) throws IOException {
-        StringBuilder sb = new StringBuilder(1024);
-        char[] buf = new char[1024];
-        try {
-            int len;
-            // read until we reach then end which is the -1 marker
-            while ((len = reader.read(buf)) != -1) {
-                sb.append(buf, 0, len);
-            }
-        } finally {
-            IOHelper.close(reader, "reader", LOG);
-        }
-
-        return sb.toString();
-    }
-
-    /**
-     * Encoding-aware file reader.
-     */
-    private static class EncodingFileReader extends InputStreamReader {
-
-        private final FileInputStream in;
-
-        /**
-         * @param in file to read
-         * @param charset character set to use
-         */
-        EncodingFileReader(FileInputStream in, String charset)
-            throws FileNotFoundException, UnsupportedEncodingException {
-            super(in, charset);
-            this.in = in;
-        }
-
-        @Override
-        public void close() throws IOException {
-            try {
-                super.close();
-            } finally {
-                in.close();
-            }
-        }
-    }
-
-    /**
-     * Encoding-aware input stream.
-     */
-    public static class EncodingInputStream extends InputStream {
-
-        private final File file;
-        private final BufferedReader reader;
-        private final Charset defaultStreamCharset;
-
-        private ByteBuffer bufferBytes;
-        private CharBuffer bufferedChars = CharBuffer.allocate(4096);
-
-        public EncodingInputStream(File file, String charset) throws IOException {
-            this.file = file;
-            reader = toReader(file, charset);
-            defaultStreamCharset = defaultCharset.get();
-        }
-
-        @Override
-        public int read() throws IOException {
-            if (bufferBytes == null || bufferBytes.remaining() <= 0) {
-                bufferedChars.clear();
-                int len = reader.read(bufferedChars);
-                bufferedChars.flip();
-                if (len == -1) {
-                    return -1;
-                }
-                bufferBytes = defaultStreamCharset.encode(bufferedChars);
-            }
-            return bufferBytes.get();
-        }
-
-        @Override
-        public void close() throws IOException {
-            reader.close();
-        }
-
-        @Override
-        public void reset() throws IOException {
-            reader.reset();
-        }
-
-        public InputStream toOriginalInputStream() throws FileNotFoundException {
-            return new FileInputStream(file);
-        }
-    }
-
 }
diff --git a/core/camel-base/src/main/java/org/apache/camel/converter/IOConverter.java b/core/camel-base/src/main/java/org/apache/camel/converter/IOConverter.java
index 7bd8590..8f99cbe 100644
--- a/core/camel-base/src/main/java/org/apache/camel/converter/IOConverter.java
+++ b/core/camel-base/src/main/java/org/apache/camel/converter/IOConverter.java
@@ -73,21 +73,6 @@ public final class IOConverter {
         return IOHelper.buffered(new FileInputStream(file));
     }
 
-    /**
-     * Converts the given {@link File} with the given charset to {@link InputStream} with the JVM default charset
-     *
-     * @param file the file to be converted
-     * @param charset the charset the file is read with
-     * @return the input stream with the JVM default charset
-     */
-    public static InputStream toInputStream(File file, String charset) throws IOException {
-        if (charset != null) {
-            return new IOHelper.EncodingInputStream(file, charset);
-        } else {
-            return toInputStream(file);
-        }
-    }
-
     @Converter
     public static BufferedReader toReader(File file, Exchange exchange) throws IOException {
         return IOHelper.toReader(file, ExchangeHelper.getCharsetName(exchange));
@@ -106,15 +91,11 @@ public final class IOConverter {
     @Converter
     public static BufferedWriter toWriter(File file, Exchange exchange) throws IOException {
         FileOutputStream os = new FileOutputStream(file, false);
-        return toWriter(os, ExchangeHelper.getCharsetName(exchange));
+        return IOHelper.toWriter(os, ExchangeHelper.getCharsetName(exchange));
     }
 
     public static BufferedWriter toWriter(File file, boolean append, String charset) throws IOException {
-        return toWriter(new FileOutputStream(file, append), charset);
-    }
-
-    public static BufferedWriter toWriter(FileOutputStream os, String charset) throws IOException {
-        return IOHelper.buffered(new IOHelper.EncodingFileWriter(os, charset));
+        return IOHelper.toWriter(new FileOutputStream(file, append), charset);
     }
 
     @Converter
diff --git a/core/camel-core/src/test/java/org/apache/camel/component/file/FileSplitXPathCharsetTest.java b/core/camel-core/src/test/java/org/apache/camel/component/file/FileSplitXPathCharsetTest.java
new file mode 100644
index 0000000..e61fe43
--- /dev/null
+++ b/core/camel-core/src/test/java/org/apache/camel/component/file/FileSplitXPathCharsetTest.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.file;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.camel.ContextTestSupport;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.mock.MockEndpoint;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class FileSplitXPathCharsetTest extends ContextTestSupport {
+
+    private static final String TEST_DIR = "target/data/file-split-xpath-charset";
+
+    private static Path inputCsv = Paths.get(TEST_DIR, "input.csv");
+    private static Path inputXml = Paths.get(TEST_DIR, "input.xml");
+
+    @BeforeClass
+    public static void clearInputFiles() throws IOException {
+        deleteDirectory(TEST_DIR);
+    }
+
+    @Test
+    public void testCsv() throws Exception {
+        MockEndpoint out = getMockEndpoint("mock:result");
+        out.expectedMessageCount(3);
+        out.expectedBodiesReceived("abc", "xyz", "åäö");
+
+        Files.write(inputCsv, "abc,xyz,åäö".getBytes(StandardCharsets.ISO_8859_1));
+
+        out.assertIsSatisfied();
+    }
+
+    @Test
+    public void testXml() throws Exception {
+        MockEndpoint out = getMockEndpoint("mock:result");
+        out.expectedMessageCount(3);
+        out.expectedBodiesReceived("abc", "xyz", "åäö");
+
+        Files.copy(
+                getClass().getResourceAsStream("FileSplitXPathCharsetTest-input.xml"),
+                inputXml);
+
+        out.assertIsSatisfied();
+    }
+
+    @Override
+    protected RouteBuilder createRouteBuilder() {
+        return new RouteBuilder() {
+            @Override
+            public void configure() {
+                // input: *.csv
+                fromF("file:%s?charset=ISO-8859-1&include=.*\\.csv", TEST_DIR)
+                    .split().tokenize(",")
+                        .to("mock:result");
+
+                // input: *.xml
+                fromF("file:%s?charset=ISO-8859-1&include=.*\\.xml", TEST_DIR)
+                    .split().xpath("/foo/bar/text()")
+                        .to("mock:result");
+            }
+        };
+    }
+}
diff --git a/core/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java b/core/camel-core/src/test/java/org/apache/camel/util/IOHelperCharsetTest.java
similarity index 91%
rename from core/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java
rename to core/camel-core/src/test/java/org/apache/camel/util/IOHelperCharsetTest.java
index defdc00..e5b2958 100644
--- a/core/camel-core/src/test/java/org/apache/camel/converter/IOConverterCharsetTest.java
+++ b/core/camel-core/src/test/java/org/apache/camel/util/IOHelperCharsetTest.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.camel.converter;
+package org.apache.camel.util;
 
 import java.io.BufferedReader;
 import java.io.File;
@@ -27,17 +27,17 @@ import java.nio.file.Paths;
 import java.util.Arrays;
 
 import org.apache.camel.ContextTestSupport;
-import org.apache.camel.util.IOHelper;
 import org.junit.Test;
 
-public class IOConverterCharsetTest extends ContextTestSupport {
+public class IOHelperCharsetTest extends ContextTestSupport {
+
     private static final String CONTENT = "G\u00f6tzend\u00e4mmerung,Joseph und seine Br\u00fcder";
 
     @Test
     public void testToInputStreamFileWithCharsetUTF8() throws Exception {
         switchToDefaultCharset(StandardCharsets.UTF_8);
         File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt");
-        try (InputStream in = IOConverter.toInputStream(file, "UTF-8");
+        try (InputStream in = IOHelper.toInputStream(file, "UTF-8");
              BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
              BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.UTF_8))) {
             String line = reader.readLine();
@@ -51,7 +51,7 @@ public class IOConverterCharsetTest extends ContextTestSupport {
     public void testToInputStreamFileWithCharsetUTF8withOtherDefaultEncoding() throws Exception {
         switchToDefaultCharset(StandardCharsets.ISO_8859_1);
         File file = new File("src/test/resources/org/apache/camel/converter/german.utf-8.txt");
-        try (InputStream in = IOConverter.toInputStream(file, "UTF-8");
+        try (InputStream in = IOHelper.toInputStream(file, "UTF-8");
              BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.ISO_8859_1));
              BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.UTF_8))) {
             String line = reader.readLine();
@@ -65,9 +65,9 @@ public class IOConverterCharsetTest extends ContextTestSupport {
     public void testToInputStreamFileWithCharsetLatin1() throws Exception {
         switchToDefaultCharset(StandardCharsets.UTF_8);
         File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt");
-        try (InputStream in = IOConverter.toInputStream(file, "ISO-8859-1");
+        try (InputStream in = IOHelper.toInputStream(file, "ISO-8859-1");
              BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
-             BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), "ISO-8859-1"))) {
+             BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.ISO_8859_1))) {
             String line = reader.readLine();
             String naiveLine = naiveReader.readLine();
             assertEquals(naiveLine, line);
@@ -79,7 +79,7 @@ public class IOConverterCharsetTest extends ContextTestSupport {
     public void testToInputStreamFileDirectByteDumpWithCharsetLatin1() throws Exception {
         switchToDefaultCharset(StandardCharsets.UTF_8);
         File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt");
-        try (InputStream in = IOConverter.toInputStream(file, "ISO-8859-1");
+        try (InputStream in = IOHelper.toInputStream(file, "ISO-8859-1");
              InputStream naiveIn = Files.newInputStream(Paths.get(file.getAbsolutePath()))) {
             byte[] bytes = new byte[8192];
             in.read(bytes);
@@ -105,7 +105,7 @@ public class IOConverterCharsetTest extends ContextTestSupport {
     public void testToReaderFileWithCharsetLatin1() throws Exception {
         File file = new File("src/test/resources/org/apache/camel/converter/german.iso-8859-1.txt");
         try (BufferedReader reader = IOHelper.toReader(file, "ISO-8859-1");
-             BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), "ISO-8859-1"))) {
+             BufferedReader naiveReader = new BufferedReader(new InputStreamReader(Files.newInputStream(Paths.get(file.getAbsolutePath())), StandardCharsets.ISO_8859_1))) {
             String line = reader.readLine();
             String naiveLine = naiveReader.readLine();
             assertEquals(naiveLine, line);
diff --git a/core/camel-core/src/test/resources/org/apache/camel/component/file/FileSplitXPathCharsetTest-input.xml b/core/camel-core/src/test/resources/org/apache/camel/component/file/FileSplitXPathCharsetTest-input.xml
new file mode 100644
index 0000000..5ab8ecc
--- /dev/null
+++ b/core/camel-core/src/test/resources/org/apache/camel/component/file/FileSplitXPathCharsetTest-input.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<foo>
+  <bar>abc</bar>
+  <bar>xyz</bar>
+  <bar>���</bar>
+</foo>
diff --git a/core/camel-util/src/main/java/org/apache/camel/util/IOHelper.java b/core/camel-util/src/main/java/org/apache/camel/util/IOHelper.java
index 0f883e8..ee52b06 100644
--- a/core/camel-util/src/main/java/org/apache/camel/util/IOHelper.java
+++ b/core/camel-util/src/main/java/org/apache/camel/util/IOHelper.java
@@ -617,8 +617,27 @@ public final class IOHelper {
         }
     }
 
+    /**
+     * Converts the given {@link File} with the given charset to {@link InputStream} with the JVM default charset
+     *
+     * @param file the file to be converted
+     * @param charset the charset the file is read with
+     * @return the input stream with the JVM default charset
+     */
+    public static InputStream toInputStream(File file, String charset) throws IOException {
+        if (charset != null) {
+            return new EncodingInputStream(file, charset);
+        } else {
+            return buffered(new FileInputStream(file));
+        }
+    }
+
     public static BufferedReader toReader(File file, String charset) throws IOException {
         FileInputStream in = new FileInputStream(file);
         return IOHelper.buffered(new EncodingFileReader(in, charset));
     }
+
+    public static BufferedWriter toWriter(FileOutputStream os, String charset) throws IOException {
+        return IOHelper.buffered(new EncodingFileWriter(os, charset));
+    }
 }