You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by da...@apache.org on 2017/01/29 16:54:33 UTC

[3/6] camel git commit: CAMEL-10740 - Code cleanup and encoding support.

CAMEL-10740 - Code cleanup and encoding support.

Project: http://git-wip-us.apache.org/repos/asf/camel/repo
Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/c73068e7
Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/c73068e7
Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/c73068e7

Branch: refs/heads/master
Commit: c73068e7d42f5f8a83b218463389383d6fb26837
Parents: 17c83ba
Author: Bob Paulin <bo...@bobpaulin.com>
Authored: Sat Jan 28 23:58:12 2017 -0600
Committer: Claus Ibsen <da...@apache.org>
Committed: Sun Jan 29 17:06:27 2017 +0100

----------------------------------------------------------------------
 components/camel-tika/pom.xml                   | 141 +++++++++----------
 .../src/main/docs/tika-component.adoc           |   8 +-
 .../camel/component/tika/TikaConfiguration.java |  26 +++-
 .../camel/component/tika/TikaEndpoint.java      |   2 +-
 .../camel/component/tika/TikaProducer.java      |  38 ++---
 .../camel/component/tika/TikaParseTest.java     |  67 ++++++++-
 .../src/test/resources/testOpenOffice2.odt      | Bin 0 -> 26460 bytes
 7 files changed, 175 insertions(+), 107 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/pom.xml
----------------------------------------------------------------------
diff --git a/components/camel-tika/pom.xml b/components/camel-tika/pom.xml
index 86f0131..6233b9f 100644
--- a/components/camel-tika/pom.xml
+++ b/components/camel-tika/pom.xml
@@ -15,80 +15,79 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 
-    <modelVersion>4.0.0</modelVersion>
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.camel</groupId>
+    <artifactId>components</artifactId>
+    <version>2.19.0-SNAPSHOT</version>
+  </parent>
 
-    <parent>
-        <groupId>org.apache.camel</groupId>
-        <artifactId>components</artifactId>
-        <version>2.19.0-SNAPSHOT</version>
-    </parent>
+  <artifactId>camel-tika</artifactId>
+  <packaging>jar</packaging>
+  <name>Camel :: Tika</name>
+  <description>This component integrates with Apache Tika to extract content and metadata from thousands of file types.</description>
 
-    <artifactId>camel-tika</artifactId>
-    <packaging>jar</packaging>
-    <name>Camel :: Tika</name>
-    <description>This component integrates with Apache Tika to extract content and metadata from thousands of file types.</description>
+  <properties>
+    <camel.osgi.export.pkg>org.apache.camel.component.tika.*</camel.osgi.export.pkg>
+    <camel.osgi.export.service>org.apache.camel.spi.ComponentResolver;component=tika</camel.osgi.export.service>
+  </properties>
 
-    <properties>
-        <camel.osgi.export.pkg>org.apache.camel.component.tika.*</camel.osgi.export.pkg>
-        <camel.osgi.export.service>org.apache.camel.spi.ComponentResolver;component=tika</camel.osgi.export.service>
-    </properties>
-
-    <dependencies>
-
-        <dependency>
-            <groupId>org.apache.camel</groupId>
-            <artifactId>camel-core</artifactId>
-        </dependency>
-        <dependency>
-		    <groupId>org.apache.tika</groupId>
-		    <artifactId>tika-core</artifactId>
-		    <version>${tika-version}</version>
-		</dependency>
-		<dependency>
-		    <groupId>org.apache.tika</groupId>
-		    <artifactId>tika-parsers</artifactId>
-		    <version>${tika-version}</version>
-		</dependency>
-        <!-- test dependencies -->
-        <dependency>
-            <groupId>org.apache.camel</groupId>
-            <artifactId>camel-test-spring</artifactId>
-            <scope>test</scope>
-        </dependency>  
-        <dependency>
-            <groupId>org.apache.logging.log4j</groupId>
-            <artifactId>log4j-api</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.logging.log4j</groupId>
-            <artifactId>log4j-core</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.logging.log4j</groupId>
-            <artifactId>log4j-slf4j-impl</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-            <scope>test</scope>
-        </dependency>
-        <dependency>
-        	<groupId>commons-io</groupId>
-        	<artifactId>commons-io</artifactId>
-        	<version>${commons-io-version}</version>
-        	<scope>test</scope>
-        </dependency>
-        <dependency>
-	      <groupId>org.hamcrest</groupId>
-	      <artifactId>java-hamcrest</artifactId>
-	      <version>${hamcrest-version}</version>
-	      <scope>test</scope>
-	    </dependency>
-    </dependencies>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.camel</groupId>
+      <artifactId>camel-core</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${tika-version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-parsers</artifactId>
+      <version>${tika-version}</version>
+    </dependency>
+    <!-- test dependencies -->
+    <dependency>
+      <groupId>org.apache.camel</groupId>
+      <artifactId>camel-test-spring</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-api</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-slf4j-impl</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <version>${commons-io-version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>java-hamcrest</artifactId>
+      <version>${hamcrest-version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
 
 </project>

http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/src/main/docs/tika-component.adoc
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/docs/tika-component.adoc b/components/camel-tika/src/main/docs/tika-component.adoc
index 7049a59..f077452 100644
--- a/components/camel-tika/src/main/docs/tika-component.adoc
+++ b/components/camel-tika/src/main/docs/tika-component.adoc
@@ -41,7 +41,7 @@ The Tika component has no options.
 
 
 // endpoint options: START
-The Tika component supports 5 endpoint options which are listed below:
+The Tika component supports 6 endpoint options which are listed below:
 
 {% raw %}
 [width="100%",cols="2,1,1m,1m,5",options="header"]
@@ -49,8 +49,9 @@ The Tika component supports 5 endpoint options which are listed below:
 | Name | Group | Default | Java Type | Description
 | operation | producer |  | TikaOperation | *Required* Tika Operation. parse or detect
 | tikaConfig | producer |  | TikaConfig | Tika Config
-| tikaConfigUri | producer |  | String | Tika Config Uri
-| tikaParseOutputFormat | producer | xml | TikaParseOutputFormat | Tika Output Format. Supported output formats are xml html text textMain
+| tikaConfigUri | producer |  | String | Tika Config Uri: The URI of tika-config.xml
+| tikaParseOutputEncoding | producer |  | String | Tika Parse Output Encoding - Used to specify the character encoding of the parsed output. Defaults to Charset.defaultCharset() .
+| tikaParseOutputFormat | producer | xml | TikaParseOutputFormat | Tika Output Format. Supported output formats. xml: Returns Parsed Content as XML. html: Returns Parsed Content as HTML. text: Returns Parsed Content as Text. textMain: Uses the boilerpipe library to automatically extract the main content from a web page.
 | synchronous | advanced | false | boolean | Sets whether synchronous processing should be strictly used or Camel is allowed to use asynchronous processing (if supported).
 |=======================================================================
 {% endraw %}
@@ -61,7 +62,6 @@ The Tika component supports 5 endpoint options which are listed below:
 [width="100%",cols="10%,90%",options="header",]
 |=======================================================================
 |Header |Description
-|TikaXXXX | Any Tika Metadata Header is converted to a Camel Header with Prefix Tika
 |=======================================================================
 
 ### To Detect a file's MIME Type

http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
index 051ad2a..33542c0 100644
--- a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
@@ -17,6 +17,7 @@
 package org.apache.camel.component.tika;
 
 import java.io.IOException;
+import java.nio.charset.Charset;
 
 import org.xml.sax.SAXException;
 
@@ -36,6 +37,8 @@ public class TikaConfiguration {
     private TikaOperation operation;
     @UriParam(defaultValue = "xml")
     private TikaParseOutputFormat tikaParseOutputFormat = TikaParseOutputFormat.xml;
+    @UriParam(description = "Tika Parse Output Encoding")
+    private String tikaParseOutputEncoding = Charset.defaultCharset().name();
     @UriParam(description = "Tika Config")
     private TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
     @UriParam(description = "Tika Config Url")
@@ -64,12 +67,31 @@ public class TikaConfiguration {
 
     /**
      * 
-     * Tika Output Format. Supported output formats are xml, html, text, textMain
+     * Tika Output Format. Supported output formats. 
+     * <ul>
+     *   <li>xml: Returns Parsed Content as XML. </li>
+     *   <li>html: Returns Parsed Content as HTML. </li>
+     *   <li>text: Returns Parsed Content as Text. </li>
+     *   <li>textMain: Uses the <a href="http://code.google.com/p/boilerpipe/">boilerpipe</a> library to automatically extract the main content from a web page. </li>
+     * </ul>
      * 
      */
     public void setTikaParseOutputFormat(TikaParseOutputFormat tikaParseOutputFormat) {
         this.tikaParseOutputFormat = tikaParseOutputFormat;
     }
+    
+    public String getTikaParseOutputEncoding() {
+        return tikaParseOutputEncoding;
+    }
+    
+    /**
+     * Tika Parse Output Encoding - Used to specify the character encoding of the parsed output.  
+     * Defaults to Charset.defaultCharset() .
+     * 
+     */
+    public void setTikaParseOutputEncoding(String tikaParseOutputEncoding) {
+        this.tikaParseOutputEncoding = tikaParseOutputEncoding;
+    }
 
     public TikaConfig getTikaConfig() {
         return tikaConfig;
@@ -90,7 +112,7 @@ public class TikaConfiguration {
 
     /**
      * 
-     * Tika Config Uri
+     * Tika Config Uri: The URI of  tika-config.xml
      * 
      */
     public void setTikaConfigUri(String tikaConfigUri) throws TikaException, IOException, SAXException {

http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
index cb8fbdd..a1701d3 100644
--- a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
@@ -24,7 +24,7 @@ import org.apache.camel.impl.DefaultEndpoint;
 import org.apache.camel.spi.UriEndpoint;
 import org.apache.camel.spi.UriParam;
 
-@UriEndpoint(scheme = "tika", title = "Tika", syntax = "tika:operation", producerOnly = true, label = "tika")
+@UriEndpoint(scheme = "tika", title = "Tika", syntax = "tika:operation", producerOnly = true, label = "transformation")
 public class TikaEndpoint extends DefaultEndpoint {
 
     @UriParam

http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
index 1e0d9ca..309df98 100644
--- a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
@@ -22,10 +22,6 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.UnsupportedEncodingException;
-import java.io.Writer;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
-import java.util.Locale;
 
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.TransformerConfigurationException;
@@ -57,10 +53,13 @@ public class TikaProducer extends DefaultProducer {
     private final Parser parser;
 
     private final Detector detector;
+    
+    private final String encoding;
 
     public TikaProducer(TikaEndpoint endpoint) {
         super(endpoint);
         this.tikaConfiguration = endpoint.getTikaConfiguration();
+        this.encoding = this.tikaConfiguration.getTikaParseOutputEncoding();
         TikaConfig config = this.tikaConfiguration.getTikaConfig();
         this.parser = new AutoDetectParser(config);
         this.detector = config.getDetector();
@@ -111,7 +110,7 @@ public class TikaProducer extends DefaultProducer {
     private void convertMetadataToHeaders(Metadata metadata, Exchange exchange) {
         if (metadata != null) {
             for (String metaname : metadata.names()) {
-                exchange.getIn().setHeader("Tika" + metaname, metadata.get(metaname));
+                exchange.getIn().setHeader(metaname, metadata.get(metaname));
             }
         }
     }
@@ -122,19 +121,18 @@ public class TikaProducer extends DefaultProducer {
         ContentHandler result = null;
 
         TikaParseOutputFormat outputFormat = configuration.getTikaParseOutputFormat();
-        String encoding = Charset.defaultCharset().name();
         switch (outputFormat) {
         case xml:
-            result = getTransformerHandler(outputStream, "xml", encoding, true);
+            result = getTransformerHandler(outputStream, "xml", true);
             break;
         case text:
-            result = new BodyContentHandler(outputStream);
+            result = new BodyContentHandler(new OutputStreamWriter(outputStream, this.encoding));
             break;
         case textMain:
-            result = new BoilerpipeContentHandler(getOutputWriter(outputStream, encoding));
+            result = new BoilerpipeContentHandler(new OutputStreamWriter(outputStream, this.encoding));
             break;
         case html:
-            result = new ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", encoding, true));
+            result = new ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", true));
             break;
         default:
             throw new IllegalArgumentException(
@@ -143,26 +141,16 @@ public class TikaProducer extends DefaultProducer {
         return result;
     }
 
-    private TransformerHandler getTransformerHandler(OutputStream output, String method, String encoding,
-            boolean prettyPrint) throws TransformerConfigurationException {
+    private TransformerHandler getTransformerHandler(OutputStream output, String method,
+            boolean prettyPrint) throws TransformerConfigurationException, UnsupportedEncodingException {
         SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
         TransformerHandler handler = factory.newTransformerHandler();
         handler.getTransformer().setOutputProperty(OutputKeys.METHOD, method);
         handler.getTransformer().setOutputProperty(OutputKeys.INDENT, prettyPrint ? "yes" : "no");
-        if (encoding != null) {
-            handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, encoding);
+        if (this.encoding != null) {
+            handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, this.encoding);
         }
-        handler.setResult(new StreamResult(output));
+        handler.setResult(new StreamResult(new OutputStreamWriter(output, this.encoding)));
         return handler;
     }
-
-    private Writer getOutputWriter(OutputStream output, String encoding) throws UnsupportedEncodingException {
-        if (encoding != null) {
-            return new OutputStreamWriter(output, encoding);
-        } else if (System.getProperty("os.name").toLowerCase(Locale.ROOT).startsWith("mac os x")) {
-            return new OutputStreamWriter(output, StandardCharsets.UTF_8);
-        } else {
-            return new OutputStreamWriter(output, Charset.defaultCharset());
-        }
-    }
 }

http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
index dc6d97e..1db2a8d 100644
--- a/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
+++ b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
@@ -16,7 +16,15 @@
  */
 package org.apache.camel.component.tika;
 
+import java.io.ByteArrayInputStream;
 import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.util.Map;
 
 import org.apache.camel.EndpointInject;
@@ -26,7 +34,11 @@ import org.apache.camel.builder.RouteBuilder;
 import org.apache.camel.component.mock.MockEndpoint;
 import org.apache.camel.impl.JndiRegistry;
 import org.apache.camel.test.junit4.CamelTestSupport;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.txt.UniversalEncodingDetector;
 import org.junit.Test;
+import org.mozilla.universalchardet.UniversalDetector;
+
 import static org.hamcrest.Matchers.*;
 
 public class TikaParseTest extends CamelTestSupport {
@@ -48,8 +60,54 @@ public class TikaParseTest extends CamelTestSupport {
                 Object body = exchange.getIn().getBody(String.class);
                 Map<String, Object> headerMap = exchange.getIn().getHeaders();
                 assertThat(body, instanceOf(String.class));
+                
+                Charset detectedCharset = null;
+                try {
+                    InputStream bodyIs = new ByteArrayInputStream(((String)body).getBytes());
+                    UniversalEncodingDetector encodingDetector = new UniversalEncodingDetector();
+                    detectedCharset = encodingDetector.detect(bodyIs, new Metadata());
+                } catch (IOException e1) {
+                    fail();
+                }
+                
+                
+                assertThat(detectedCharset.name(), startsWith(Charset.defaultCharset().name()));
+                
                 assertThat((String) body, containsString("test"));
-                assertThat(headerMap.get("TikaContent-Type"), equalTo("application/msword"));
+                assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("application/msword"));
+                return true;
+            }
+        });
+        resultEndpoint.assertIsSatisfied();
+    }
+    
+    @Test
+    public void testDocumentParseWithEncoding() throws Exception {
+
+        File document = new File("src/test/resources/testOpenOffice2.odt");
+        template.sendBody("direct:start4", document);
+
+        resultEndpoint.setExpectedMessageCount(1);
+
+        resultEndpoint.expectedMessagesMatches(new Predicate() {
+            @Override
+            public boolean matches(Exchange exchange) {
+                Object body = exchange.getIn().getBody(String.class);
+                Map<String, Object> headerMap = exchange.getIn().getHeaders();
+                assertThat(body, instanceOf(String.class));
+                
+                Charset detectedCharset = null;
+                try {
+                    InputStream bodyIs = new ByteArrayInputStream(((String)body).getBytes(StandardCharsets.UTF_16));
+                    UniversalEncodingDetector encodingDetector = new UniversalEncodingDetector();
+                    detectedCharset = encodingDetector.detect(bodyIs, new Metadata());
+                } catch (IOException e1) {
+                    fail();
+                }
+                
+                
+                assertThat(detectedCharset.name(), startsWith(StandardCharsets.UTF_16.name()));
+                assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("application/vnd.oasis.opendocument.text"));
                 return true;
             }
         });
@@ -70,7 +128,7 @@ public class TikaParseTest extends CamelTestSupport {
                 Map<String, Object> headerMap = exchange.getIn().getHeaders();
                 assertThat(body, instanceOf(String.class));
                 assertThat((String) body, containsString("<body/>"));
-                assertThat(headerMap.get("TikaContent-Type"), equalTo("image/gif"));
+                assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("image/gif"));
                 return true;
             }
         });
@@ -91,7 +149,7 @@ public class TikaParseTest extends CamelTestSupport {
                 Map<String, Object> headerMap = exchange.getIn().getHeaders();
                 assertThat(body, instanceOf(String.class));
                 assertThat((String) body, containsString("<body/>"));
-                assertThat(headerMap.get("TikaContent-Type"), equalTo("application/msword"));
+                assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("application/msword"));
                 return true;
             }
         });
@@ -112,7 +170,7 @@ public class TikaParseTest extends CamelTestSupport {
                 Map<String, Object> headerMap = exchange.getIn().getHeaders();
                 assertThat(body, instanceOf(String.class));
                 assertThat((String) body, containsString("<body/>"));
-                assertThat(headerMap.get("TikaContent-Type"), equalTo("application/msword"));
+                assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("application/msword"));
                 return true;
             }
         });
@@ -128,6 +186,7 @@ public class TikaParseTest extends CamelTestSupport {
                 from("direct:start2").to("tika:parse?tikaConfigUri=src/test/resources/tika-empty.xml")
                         .to("mock:result");
                 from("direct:start3").to("tika:parse?tikaConfig=#testConfig").to("mock:result");
+                from("direct:start4").to("tika:parse?tikaParseOutputEncoding=" + StandardCharsets.UTF_16.name()).to("mock:result");
             }
         };
     }

http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/src/test/resources/testOpenOffice2.odt
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/resources/testOpenOffice2.odt b/components/camel-tika/src/test/resources/testOpenOffice2.odt
new file mode 100644
index 0000000..0b1bb11
Binary files /dev/null and b/components/camel-tika/src/test/resources/testOpenOffice2.odt differ