You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by da...@apache.org on 2017/01/29 16:54:31 UTC
[1/6] camel git commit: CAMEL-10740 - Added JavaDoc to
TikaParseOutputFormat
Repository: camel
Updated Branches:
refs/heads/master 9c44f6ac7 -> 53c98ed09
CAMEL-10740 - Added JavaDoc to TikaParseOutputFormat
Project: http://git-wip-us.apache.org/repos/asf/camel/repo
Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/4c36002d
Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/4c36002d
Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/4c36002d
Branch: refs/heads/master
Commit: 4c36002db9ae56bc3c796e10aa461c6f022d2346
Parents: c73068e
Author: Bob Paulin <bo...@bobpaulin.com>
Authored: Sun Jan 29 00:02:37 2017 -0600
Committer: Claus Ibsen <da...@apache.org>
Committed: Sun Jan 29 17:06:27 2017 +0100
----------------------------------------------------------------------
.../camel/component/tika/TikaParseOutputFormat.java | 11 +++++++++++
1 file changed, 11 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/camel/blob/4c36002d/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
index 82005be..4b42d4f 100644
--- a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
@@ -16,6 +16,17 @@
*/
package org.apache.camel.component.tika;
+/**
+ *
+ * Tika Output Format. Supported output formats.
+ * <ul>
+ * <li>xml: Returns Parsed Content as XML. </li>
+ * <li>html: Returns Parsed Content as HTML. </li>
+ * <li>text: Returns Parsed Content as Text. </li>
+ * <li>textMain: Uses the <a href="http://code.google.com/p/boilerpipe/">boilerpipe</a> library to automatically extract the main content from a web page. </li>
+ * </ul>
+ *
+ */
public enum TikaParseOutputFormat {
xml, html, text, textMain;
}
\ No newline at end of file
[6/6] camel git commit: CAMEL-10740: Polished. This closes #1422
Posted by da...@apache.org.
CAMEL-10740: Polished. This closes #1422
Project: http://git-wip-us.apache.org/repos/asf/camel/repo
Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/53c98ed0
Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/53c98ed0
Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/53c98ed0
Branch: refs/heads/master
Commit: 53c98ed09aa2430d14233d0392b49a8824ff3fa9
Parents: b24af35
Author: Claus Ibsen <da...@apache.org>
Authored: Sun Jan 29 17:50:31 2017 +0100
Committer: Claus Ibsen <da...@apache.org>
Committed: Sun Jan 29 17:54:19 2017 +0100
----------------------------------------------------------------------
components/camel-tika/pom.xml | 3 +++
.../src/main/docs/tika-component.adoc | 5 ----
.../src/test/resources/log4j2.properties | 28 ++++++++++++++++++++
3 files changed, 31 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/camel/blob/53c98ed0/components/camel-tika/pom.xml
----------------------------------------------------------------------
diff --git a/components/camel-tika/pom.xml b/components/camel-tika/pom.xml
index 6233b9f..08a605e 100644
--- a/components/camel-tika/pom.xml
+++ b/components/camel-tika/pom.xml
@@ -36,10 +36,12 @@
</properties>
<dependencies>
+
<dependency>
<groupId>org.apache.camel</groupId>
<artifactId>camel-core</artifactId>
</dependency>
+
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
@@ -50,6 +52,7 @@
<artifactId>tika-parsers</artifactId>
<version>${tika-version}</version>
</dependency>
+
<!-- test dependencies -->
<dependency>
<groupId>org.apache.camel</groupId>
http://git-wip-us.apache.org/repos/asf/camel/blob/53c98ed0/components/camel-tika/src/main/docs/tika-component.adoc
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/docs/tika-component.adoc b/components/camel-tika/src/main/docs/tika-component.adoc
index f077452..d228464 100644
--- a/components/camel-tika/src/main/docs/tika-component.adoc
+++ b/components/camel-tika/src/main/docs/tika-component.adoc
@@ -1,5 +1,4 @@
## Tika Component
-### TIKA
**Available as of Camel 2.19.0**
@@ -90,7 +89,3 @@ from("direct:start")
* link:component.html[Component]
* link:endpoint.html[Endpoint]
* link:getting-started.html[Getting Started]
-
--
-
--
http://git-wip-us.apache.org/repos/asf/camel/blob/53c98ed0/components/camel-tika/src/test/resources/log4j2.properties
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/resources/log4j2.properties b/components/camel-tika/src/test/resources/log4j2.properties
new file mode 100644
index 0000000..3b1bd0d
--- /dev/null
+++ b/components/camel-tika/src/test/resources/log4j2.properties
@@ -0,0 +1,28 @@
+## ---------------------------------------------------------------------------
+## Licensed to the Apache Software Foundation (ASF) under one or more
+## contributor license agreements. See the NOTICE file distributed with
+## this work for additional information regarding copyright ownership.
+## The ASF licenses this file to You under the Apache License, Version 2.0
+## (the "License"); you may not use this file except in compliance with
+## the License. You may obtain a copy of the License at
+##
+## http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+## ---------------------------------------------------------------------------
+
+appender.file.type = File
+appender.file.name = file
+appender.file.fileName = target/camel-tika-test.log
+appender.file.layout.type = PatternLayout
+appender.file.layout.pattern = %d [%-15.15t] %-5p %-30.30c{1} - %m%n
+appender.out.type = Console
+appender.out.name = out
+appender.out.layout.type = PatternLayout
+appender.out.layout.pattern = [%30.30t] %-30.30c{1} %-5p %m%n
+rootLogger.level = INFO
+rootLogger.appenderRef.file.ref = file
[5/6] camel git commit: CAMEL-10740: Added karaf itest
Posted by da...@apache.org.
CAMEL-10740: Added karaf itest
Project: http://git-wip-us.apache.org/repos/asf/camel/repo
Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/b24af35f
Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/b24af35f
Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/b24af35f
Branch: refs/heads/master
Commit: b24af35f986c4272e5073e54c5164932057bf0d0
Parents: 13a7c07
Author: Claus Ibsen <da...@apache.org>
Authored: Sun Jan 29 17:47:54 2017 +0100
Committer: Claus Ibsen <da...@apache.org>
Committed: Sun Jan 29 17:47:54 2017 +0100
----------------------------------------------------------------------
.../apache/camel/itest/karaf/CamelTikaTest.java | 33 ++++++++++++++++++++
1 file changed, 33 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/camel/blob/b24af35f/tests/camel-itest-karaf/src/test/java/org/apache/camel/itest/karaf/CamelTikaTest.java
----------------------------------------------------------------------
diff --git a/tests/camel-itest-karaf/src/test/java/org/apache/camel/itest/karaf/CamelTikaTest.java b/tests/camel-itest-karaf/src/test/java/org/apache/camel/itest/karaf/CamelTikaTest.java
new file mode 100644
index 0000000..be57b59
--- /dev/null
+++ b/tests/camel-itest-karaf/src/test/java/org/apache/camel/itest/karaf/CamelTikaTest.java
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.itest.karaf;
+
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.ops4j.pax.exam.junit.PaxExam;
+
+@RunWith(PaxExam.class)
+public class CamelTikaTest extends BaseKarafTest {
+
+ public static final String COMPONENT = extractName(CamelTikaTest.class);
+
+ @Test
+ public void test() throws Exception {
+ testComponent(COMPONENT);
+ }
+
+}
\ No newline at end of file
[4/6] camel git commit: CAMEL-10740: Regen code
Posted by da...@apache.org.
CAMEL-10740: Regen code
Project: http://git-wip-us.apache.org/repos/asf/camel/repo
Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/13a7c07d
Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/13a7c07d
Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/13a7c07d
Branch: refs/heads/master
Commit: 13a7c07d6f70da0599d7d34b2f89b00786a503b7
Parents: 4c36002
Author: Claus Ibsen <da...@apache.org>
Authored: Sun Jan 29 17:44:39 2017 +0100
Committer: Claus Ibsen <da...@apache.org>
Committed: Sun Jan 29 17:44:39 2017 +0100
----------------------------------------------------------------------
.../camel-tika-starter/pom.xml | 59 ++++++
.../TikaComponentAutoConfiguration.java | 80 ++++++++
.../src/main/resources/META-INF/LICENSE.txt | 203 +++++++++++++++++++
.../src/main/resources/META-INF/NOTICE.txt | 11 +
...dditional-spring-configuration-metadata.json | 10 +
.../main/resources/META-INF/spring.factories | 19 ++
.../src/main/resources/META-INF/spring.provides | 18 ++
.../spring-boot/components-starter/pom.xml | 1 +
.../camel-spring-boot-dependencies/pom.xml | 5 +
9 files changed, 406 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/camel/blob/13a7c07d/platforms/spring-boot/components-starter/camel-tika-starter/pom.xml
----------------------------------------------------------------------
diff --git a/platforms/spring-boot/components-starter/camel-tika-starter/pom.xml b/platforms/spring-boot/components-starter/camel-tika-starter/pom.xml
new file mode 100644
index 0000000..0302fce
--- /dev/null
+++ b/platforms/spring-boot/components-starter/camel-tika-starter/pom.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>components-starter</artifactId>
+ <version>2.19.0-SNAPSHOT</version>
+ </parent>
+ <artifactId>camel-tika-starter</artifactId>
+ <packaging>jar</packaging>
+ <name>Spring-Boot Starter :: Camel :: Tika</name>
+ <description>Spring-Boot Starter for This component integrates with Apache Tika to extract content and metadata from thousands of file types.</description>
+ <dependencies>
+ <dependency>
+ <groupId>org.springframework.boot</groupId>
+ <artifactId>spring-boot-starter</artifactId>
+ <version>${spring-boot-version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-tika</artifactId>
+ <version>${project.version}</version>
+ <!--START OF GENERATED CODE-->
+ <exclusions>
+ <exclusion>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ </exclusion>
+ </exclusions>
+ <!--END OF GENERATED CODE-->
+ </dependency>
+ <!--START OF GENERATED CODE-->
+ <dependency>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-core-starter</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-spring-boot-starter</artifactId>
+ </dependency>
+ <!--END OF GENERATED CODE-->
+ </dependencies>
+</project>
http://git-wip-us.apache.org/repos/asf/camel/blob/13a7c07d/platforms/spring-boot/components-starter/camel-tika-starter/src/main/java/org/apache/camel/component/tika/springboot/TikaComponentAutoConfiguration.java
----------------------------------------------------------------------
diff --git a/platforms/spring-boot/components-starter/camel-tika-starter/src/main/java/org/apache/camel/component/tika/springboot/TikaComponentAutoConfiguration.java b/platforms/spring-boot/components-starter/camel-tika-starter/src/main/java/org/apache/camel/component/tika/springboot/TikaComponentAutoConfiguration.java
new file mode 100644
index 0000000..72c4d56
--- /dev/null
+++ b/platforms/spring-boot/components-starter/camel-tika-starter/src/main/java/org/apache/camel/component/tika/springboot/TikaComponentAutoConfiguration.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika.springboot;
+
+import org.apache.camel.CamelContext;
+import org.apache.camel.component.tika.TikaComponent;
+import org.springframework.boot.autoconfigure.AutoConfigureAfter;
+import org.springframework.boot.autoconfigure.condition.ConditionMessage;
+import org.springframework.boot.autoconfigure.condition.ConditionOutcome;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnBean;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
+import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
+import org.springframework.boot.autoconfigure.condition.SpringBootCondition;
+import org.springframework.boot.bind.RelaxedPropertyResolver;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.ConditionContext;
+import org.springframework.context.annotation.Conditional;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Lazy;
+import org.springframework.core.type.AnnotatedTypeMetadata;
+
+/**
+ * Generated by camel-package-maven-plugin - do not edit this file!
+ */
+@Configuration
+@ConditionalOnBean(type = "org.apache.camel.spring.boot.CamelAutoConfiguration")
+@Conditional(TikaComponentAutoConfiguration.Condition.class)
+@AutoConfigureAfter(name = "org.apache.camel.spring.boot.CamelAutoConfiguration")
+public class TikaComponentAutoConfiguration {
+
+ @Lazy
+ @Bean(name = "tika-component")
+ @ConditionalOnClass(CamelContext.class)
+ @ConditionalOnMissingBean(TikaComponent.class)
+ public TikaComponent configureTikaComponent(CamelContext camelContext)
+ throws Exception {
+ TikaComponent component = new TikaComponent();
+ component.setCamelContext(camelContext);
+ return component;
+ }
+
+ public static class Condition extends SpringBootCondition {
+ @Override
+ public ConditionOutcome getMatchOutcome(
+ ConditionContext conditionContext,
+ AnnotatedTypeMetadata annotatedTypeMetadata) {
+ boolean groupEnabled = isEnabled(conditionContext,
+ "camel.component.", true);
+ ConditionMessage.Builder message = ConditionMessage
+ .forCondition("camel.component.tika");
+ if (isEnabled(conditionContext, "camel.component.tika.",
+ groupEnabled)) {
+ return ConditionOutcome.match(message.because("enabled"));
+ }
+ return ConditionOutcome.noMatch(message.because("not enabled"));
+ }
+
+ private boolean isEnabled(
+ org.springframework.context.annotation.ConditionContext context,
+ java.lang.String prefix, boolean defaultValue) {
+ RelaxedPropertyResolver resolver = new RelaxedPropertyResolver(
+ context.getEnvironment(), prefix);
+ return resolver.getProperty("enabled", Boolean.class, defaultValue);
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/camel/blob/13a7c07d/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/LICENSE.txt
----------------------------------------------------------------------
diff --git a/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/LICENSE.txt b/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 0000000..6b0b127
--- /dev/null
+++ b/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,203 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
http://git-wip-us.apache.org/repos/asf/camel/blob/13a7c07d/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/NOTICE.txt
----------------------------------------------------------------------
diff --git a/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/NOTICE.txt b/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/NOTICE.txt
new file mode 100644
index 0000000..2e215bf
--- /dev/null
+++ b/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/NOTICE.txt
@@ -0,0 +1,11 @@
+ =========================================================================
+ == NOTICE file corresponding to the section 4 d of ==
+ == the Apache License, Version 2.0, ==
+ == in this case for the Apache Camel distribution. ==
+ =========================================================================
+
+ This product includes software developed by
+ The Apache Software Foundation (http://www.apache.org/).
+
+ Please read the different LICENSE files present in the licenses directory of
+ this distribution.
http://git-wip-us.apache.org/repos/asf/camel/blob/13a7c07d/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/additional-spring-configuration-metadata.json
----------------------------------------------------------------------
diff --git a/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/additional-spring-configuration-metadata.json b/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/additional-spring-configuration-metadata.json
new file mode 100644
index 0000000..01b4a88
--- /dev/null
+++ b/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/additional-spring-configuration-metadata.json
@@ -0,0 +1,10 @@
+{
+ "properties": [
+ {
+ "defaultValue": true,
+ "name": "camel.component.tika.enabled",
+ "description": "Enable tika component",
+ "type": "java.lang.Boolean"
+ }
+ ]
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/camel/blob/13a7c07d/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/spring.factories
----------------------------------------------------------------------
diff --git a/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/spring.factories b/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/spring.factories
new file mode 100644
index 0000000..b45e85a
--- /dev/null
+++ b/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/spring.factories
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+org.springframework.boot.autoconfigure.EnableAutoConfiguration=\
+org.apache.camel.component.tika.springboot.TikaComponentAutoConfiguration
http://git-wip-us.apache.org/repos/asf/camel/blob/13a7c07d/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/spring.provides
----------------------------------------------------------------------
diff --git a/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/spring.provides b/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/spring.provides
new file mode 100644
index 0000000..bb3bb28
--- /dev/null
+++ b/platforms/spring-boot/components-starter/camel-tika-starter/src/main/resources/META-INF/spring.provides
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+provides: camel-tika
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/camel/blob/13a7c07d/platforms/spring-boot/components-starter/pom.xml
----------------------------------------------------------------------
diff --git a/platforms/spring-boot/components-starter/pom.xml b/platforms/spring-boot/components-starter/pom.xml
index b49dbf2..c6dc0e0 100644
--- a/platforms/spring-boot/components-starter/pom.xml
+++ b/platforms/spring-boot/components-starter/pom.xml
@@ -283,6 +283,7 @@
<module>camel-tagsoup-starter</module>
<module>camel-tarfile-starter</module>
<module>camel-telegram-starter</module>
+ <module>camel-tika-starter</module>
<module>camel-twitter-starter</module>
<module>camel-undertow-starter</module>
<module>camel-univocity-parsers-starter</module>
http://git-wip-us.apache.org/repos/asf/camel/blob/13a7c07d/platforms/spring-boot/spring-boot-dm/camel-spring-boot-dependencies/pom.xml
----------------------------------------------------------------------
diff --git a/platforms/spring-boot/spring-boot-dm/camel-spring-boot-dependencies/pom.xml b/platforms/spring-boot/spring-boot-dm/camel-spring-boot-dependencies/pom.xml
index d7367a22..ab4ab47 100644
--- a/platforms/spring-boot/spring-boot-dm/camel-spring-boot-dependencies/pom.xml
+++ b/platforms/spring-boot/spring-boot-dm/camel-spring-boot-dependencies/pom.xml
@@ -2474,6 +2474,11 @@
</dependency>
<dependency>
<groupId>org.apache.camel</groupId>
+ <artifactId>camel-tika</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.camel</groupId>
<artifactId>camel-twitter</artifactId>
<version>${project.version}</version>
</dependency>
[2/6] camel git commit: CAMEL-10740 - Initial work for camel-tika
component.
Posted by da...@apache.org.
CAMEL-10740 - Initial work for camel-tika component.
Project: http://git-wip-us.apache.org/repos/asf/camel/repo
Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/17c83bab
Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/17c83bab
Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/17c83bab
Branch: refs/heads/master
Commit: 17c83bab629b8c949919c8e1bbff6ba81c34fbba
Parents: 9c44f6a
Author: Bob Paulin <bo...@bobpaulin.com>
Authored: Tue Jan 24 17:42:10 2017 -0600
Committer: Claus Ibsen <da...@apache.org>
Committed: Sun Jan 29 17:06:27 2017 +0100
----------------------------------------------------------------------
apache-camel/pom.xml | 4 +
.../src/main/descriptors/common-bin.xml | 1 +
components/camel-tika/pom.xml | 94 +++++++++
.../src/main/docs/tika-component.adoc | 96 +++++++++
.../camel/component/tika/TikaComponent.java | 45 ++++
.../camel/component/tika/TikaConfiguration.java | 100 +++++++++
.../camel/component/tika/TikaEndpoint.java | 61 ++++++
.../camel/component/tika/TikaOperation.java | 21 ++
.../component/tika/TikaParseOutputFormat.java | 21 ++
.../camel/component/tika/TikaProducer.java | 168 +++++++++++++++
.../src/main/resources/META-INF/LICENSE.txt | 203 +++++++++++++++++++
.../src/main/resources/META-INF/NOTICE.txt | 11 +
.../services/org/apache/camel/component/tika | 17 ++
.../camel/component/tika/TikaDetectTest.java | 85 ++++++++
.../camel/component/tika/TikaEmptyConfig.java | 33 +++
.../camel/component/tika/TikaParseTest.java | 141 +++++++++++++
.../camel-tika/src/test/resources/test.doc | Bin 0 -> 9216 bytes
.../camel-tika/src/test/resources/testGIF.gif | Bin 0 -> 8495 bytes
.../src/test/resources/tika-empty.xml | 22 ++
components/pom.xml | 1 +
components/readme.adoc | 3 +
docs/user-manual/en/SUMMARY.md | 1 +
parent/pom.xml | 6 +
.../features/src/main/resources/features.xml | 6 +
24 files changed, 1140 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/apache-camel/pom.xml
----------------------------------------------------------------------
diff --git a/apache-camel/pom.xml b/apache-camel/pom.xml
index eb80a32..74cfca9 100644
--- a/apache-camel/pom.xml
+++ b/apache-camel/pom.xml
@@ -966,6 +966,10 @@
</dependency>
<dependency>
<groupId>org.apache.camel</groupId>
+ <artifactId>camel-tika</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.camel</groupId>
<artifactId>camel-twitter</artifactId>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/apache-camel/src/main/descriptors/common-bin.xml
----------------------------------------------------------------------
diff --git a/apache-camel/src/main/descriptors/common-bin.xml b/apache-camel/src/main/descriptors/common-bin.xml
index f487e42..534cc39 100644
--- a/apache-camel/src/main/descriptors/common-bin.xml
+++ b/apache-camel/src/main/descriptors/common-bin.xml
@@ -250,6 +250,7 @@
<include>org.apache.camel:camel-test-karaf</include>
<include>org.apache.camel:camel-test-spring</include>
<include>org.apache.camel:camel-testng</include>
+ <include>org.apache.camel:camel-tika</include>
<include>org.apache.camel:camel-twitter</include>
<include>org.apache.camel:camel-undertow</include>
<include>org.apache.camel:camel-univocity-parsers</include>
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/pom.xml
----------------------------------------------------------------------
diff --git a/components/camel-tika/pom.xml b/components/camel-tika/pom.xml
new file mode 100644
index 0000000..86f0131
--- /dev/null
+++ b/components/camel-tika/pom.xml
@@ -0,0 +1,94 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>components</artifactId>
+ <version>2.19.0-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>camel-tika</artifactId>
+ <packaging>jar</packaging>
+ <name>Camel :: Tika</name>
+ <description>This component integrates with Apache Tika to extract content and metadata from thousands of file types.</description>
+
+ <properties>
+ <camel.osgi.export.pkg>org.apache.camel.component.tika.*</camel.osgi.export.pkg>
+ <camel.osgi.export.service>org.apache.camel.spi.ComponentResolver;component=tika</camel.osgi.export.service>
+ </properties>
+
+ <dependencies>
+
+ <dependency>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${tika-version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parsers</artifactId>
+ <version>${tika-version}</version>
+ </dependency>
+ <!-- test dependencies -->
+ <dependency>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-test-spring</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-api</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j-impl</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>${commons-io-version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.hamcrest</groupId>
+ <artifactId>java-hamcrest</artifactId>
+ <version>${hamcrest-version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+</project>
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/docs/tika-component.adoc
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/docs/tika-component.adoc b/components/camel-tika/src/main/docs/tika-component.adoc
new file mode 100644
index 0000000..7049a59
--- /dev/null
+++ b/components/camel-tika/src/main/docs/tika-component.adoc
@@ -0,0 +1,96 @@
+## Tika Component
+### TIKA
+
+**Available as of Camel 2.19.0**
+
+The *Tika*: components provides the ability to detect and parse documents with
+Apache Tika. This component uses
+https://tika.apache.org/[Apache Tika] as underlying library to work
+with documents.
+
+In order to use the Tika component, Maven users will need to add the
+following dependency to their `pom.xml`:
+
+*pom.xml*
+
+[source,xml]
+------------------------------------------------------------
+<dependency>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-tika</artifactId>
+ <version>x.x.x</version>
+ <!-- use the same version as your Camel core version -->
+</dependency>
+------------------------------------------------------------
+
+### URI format
+
+The TIKA component only supports producer endpoints.
+
+[source,java]
+-----------------------
+tika:operation[?options]
+-----------------------
+
+### Options
+
+// component options: START
+The Tika component has no options.
+// component options: END
+
+
+
+// endpoint options: START
+The Tika component supports 5 endpoint options which are listed below:
+
+{% raw %}
+[width="100%",cols="2,1,1m,1m,5",options="header"]
+|=======================================================================
+| Name | Group | Default | Java Type | Description
+| operation | producer | | TikaOperation | *Required* Tika Operation. parse or detect
+| tikaConfig | producer | | TikaConfig | Tika Config
+| tikaConfigUri | producer | | String | Tika Config Uri
+| tikaParseOutputFormat | producer | xml | TikaParseOutputFormat | Tika Output Format. Supported output formats are xml html text textMain
+| synchronous | advanced | false | boolean | Sets whether synchronous processing should be strictly used or Camel is allowed to use asynchronous processing (if supported).
+|=======================================================================
+{% endraw %}
+// endpoint options: END
+
+
+### Headers
+[width="100%",cols="10%,90%",options="header",]
+|=======================================================================
+|Header |Description
+|TikaXXXX | Any Tika Metadata Header is converted to a Camel Header with Prefix Tika
+|=======================================================================
+
+### To Detect a file's MIME Type
+
+The file should be placed in the Body.
+
+[source,java]
+-------------------------------
+from("direct:start")
+ .to("tika:detect");
+-------------------------------
+
+### To Parse a File
+
+The file should be placed in the Body.
+
+[source,java]
+-------------------------------
+from("direct:start")
+ .to("tika:parse");
+-------------------------------
+
+### See Also
+
+* link:configuring-camel.html[Configuring Camel]
+* link:component.html[Component]
+* link:endpoint.html[Endpoint]
+* link:getting-started.html[Getting Started]
+
+-
+
+-
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaComponent.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaComponent.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaComponent.java
new file mode 100644
index 0000000..cb81e3c
--- /dev/null
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaComponent.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import java.net.URI;
+import java.util.Map;
+
+import org.apache.camel.Endpoint;
+import org.apache.camel.impl.UriEndpointComponent;
+import org.apache.tika.config.TikaConfig;
+
+public class TikaComponent extends UriEndpointComponent {
+
+ private static final String TIKA_CONFIG = "tikaConfig";
+
+ public TikaComponent() {
+ super(TikaEndpoint.class);
+ }
+
+ @Override
+ protected Endpoint createEndpoint(String uri, String remaining, Map<String, Object> parameters) throws Exception {
+ TikaConfiguration tikaConfiguration = new TikaConfiguration();
+ setProperties(tikaConfiguration, parameters);
+ TikaConfig config = resolveAndRemoveReferenceParameter(parameters, TIKA_CONFIG, TikaConfig.class);
+ if (config != null) {
+ tikaConfiguration.setTikaConfig(config);
+ }
+ tikaConfiguration.setOperation(new URI(uri).getHost());
+ return new TikaEndpoint(uri, this, tikaConfiguration);
+ }
+}
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
new file mode 100644
index 0000000..051ad2a
--- /dev/null
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import java.io.IOException;
+
+import org.xml.sax.SAXException;
+
+import org.apache.camel.spi.Metadata;
+import org.apache.camel.spi.UriParam;
+import org.apache.camel.spi.UriParams;
+import org.apache.camel.spi.UriPath;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+
+
+@UriParams
+public class TikaConfiguration {
+
+ @UriPath(description = "Operation type")
+ @Metadata(required = "true")
+ private TikaOperation operation;
+ @UriParam(defaultValue = "xml")
+ private TikaParseOutputFormat tikaParseOutputFormat = TikaParseOutputFormat.xml;
+ @UriParam(description = "Tika Config")
+ private TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
+ @UriParam(description = "Tika Config Url")
+ private String tikaConfigUri;
+
+ public TikaOperation getOperation() {
+ return operation;
+ }
+
+ /**
+ *
+ * Tika Operation. parse or detect
+ *
+ */
+ public void setOperation(TikaOperation operation) {
+ this.operation = operation;
+ }
+
+ public void setOperation(String operation) {
+ this.operation = TikaOperation.valueOf(operation);
+ }
+
+ public TikaParseOutputFormat getTikaParseOutputFormat() {
+ return tikaParseOutputFormat;
+ }
+
+ /**
+ *
+ * Tika Output Format. Supported output formats are xml, html, text, textMain
+ *
+ */
+ public void setTikaParseOutputFormat(TikaParseOutputFormat tikaParseOutputFormat) {
+ this.tikaParseOutputFormat = tikaParseOutputFormat;
+ }
+
+ public TikaConfig getTikaConfig() {
+ return tikaConfig;
+ }
+
+ /**
+ *
+ * Tika Config
+ *
+ */
+ public void setTikaConfig(TikaConfig tikaConfig) {
+ this.tikaConfig = tikaConfig;
+ }
+
+ public String getTikaConfigUri() {
+ return tikaConfigUri;
+ }
+
+ /**
+ *
+ * Tika Config Uri
+ *
+ */
+ public void setTikaConfigUri(String tikaConfigUri) throws TikaException, IOException, SAXException {
+ this.tikaConfigUri = tikaConfigUri;
+ this.tikaConfig = new TikaConfig(tikaConfigUri);
+ }
+}
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
new file mode 100644
index 0000000..cb8fbdd
--- /dev/null
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import org.apache.camel.Component;
+import org.apache.camel.Consumer;
+import org.apache.camel.Processor;
+import org.apache.camel.Producer;
+import org.apache.camel.impl.DefaultEndpoint;
+import org.apache.camel.spi.UriEndpoint;
+import org.apache.camel.spi.UriParam;
+
+@UriEndpoint(scheme = "tika", title = "Tika", syntax = "tika:operation", producerOnly = true, label = "tika")
+public class TikaEndpoint extends DefaultEndpoint {
+
+ @UriParam
+ private TikaConfiguration tikaConfiguration;
+
+ public TikaEndpoint(String endpointUri, Component component, TikaConfiguration tikaConfiguration) {
+ super(endpointUri, component);
+ this.tikaConfiguration = tikaConfiguration;
+ }
+
+ @Override
+ public Producer createProducer() throws Exception {
+ return new TikaProducer(this);
+ }
+
+ @Override
+ public Consumer createConsumer(Processor processor) throws Exception {
+ throw new UnsupportedOperationException("Consumer does not supported for Tika component:" + getEndpointUri());
+ }
+
+ @Override
+ public boolean isSingleton() {
+ return true;
+ }
+
+ public TikaConfiguration getTikaConfiguration() {
+ return tikaConfiguration;
+ }
+
+ public void setTikaConfiguration(TikaConfiguration tikaConfiguration) {
+ this.tikaConfiguration = tikaConfiguration;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaOperation.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaOperation.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaOperation.java
new file mode 100644
index 0000000..912387d
--- /dev/null
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaOperation.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+public enum TikaOperation {
+ parse, detect;
+}
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
new file mode 100644
index 0000000..82005be
--- /dev/null
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaParseOutputFormat.java
@@ -0,0 +1,21 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+public enum TikaParseOutputFormat {
+ xml, html, text, textMain;
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
new file mode 100644
index 0000000..1e0d9ca
--- /dev/null
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
@@ -0,0 +1,168 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.Locale;
+
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.sax.SAXTransformerFactory;
+import javax.xml.transform.sax.TransformerHandler;
+import javax.xml.transform.stream.StreamResult;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import org.apache.camel.Exchange;
+import org.apache.camel.impl.DefaultProducer;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.html.BoilerpipeContentHandler;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.ExpandedTitleContentHandler;
+
+public class TikaProducer extends DefaultProducer {
+
+ private final TikaConfiguration tikaConfiguration;
+
+ private final Parser parser;
+
+ private final Detector detector;
+
+ public TikaProducer(TikaEndpoint endpoint) {
+ super(endpoint);
+ this.tikaConfiguration = endpoint.getTikaConfiguration();
+ TikaConfig config = this.tikaConfiguration.getTikaConfig();
+ this.parser = new AutoDetectParser(config);
+ this.detector = config.getDetector();
+ }
+
+ @Override
+ public void process(Exchange exchange) throws Exception {
+ TikaOperation operation = this.tikaConfiguration.getOperation();
+ Object result;
+ switch (operation) {
+ case detect:
+ result = doDetect(exchange);
+ break;
+ case parse:
+ result = doParse(exchange);
+ break;
+ default:
+ throw new IllegalArgumentException(String.format("Unknown operation %s", tikaConfiguration.getOperation()));
+ }
+ // propagate headers
+ exchange.getOut().setHeaders(exchange.getIn().getHeaders());
+ exchange.getOut().setAttachments(exchange.getIn().getAttachments());
+ // and set result
+ exchange.getOut().setBody(result);
+ }
+
+ private Object doDetect(Exchange exchange) throws IOException {
+ InputStream inputStream = exchange.getIn().getBody(InputStream.class);
+ Metadata metadata = new Metadata();
+ MediaType result = this.detector.detect(inputStream, metadata);
+ convertMetadataToHeaders(metadata, exchange);
+ return result.toString();
+ }
+
+ private Object doParse(Exchange exchange)
+ throws TikaException, IOException, SAXException, TransformerConfigurationException {
+ InputStream inputStream = exchange.getIn().getBody(InputStream.class);
+ OutputStream result = new ByteArrayOutputStream();
+ ContentHandler contentHandler = getContentHandler(this.tikaConfiguration, result);
+ ParseContext context = new ParseContext();
+ context.set(Parser.class, this.parser);
+ Metadata metadata = new Metadata();
+ this.parser.parse(inputStream, contentHandler, metadata, context);
+ convertMetadataToHeaders(metadata, exchange);
+ return result;
+ }
+
+ private void convertMetadataToHeaders(Metadata metadata, Exchange exchange) {
+ if (metadata != null) {
+ for (String metaname : metadata.names()) {
+ exchange.getIn().setHeader("Tika" + metaname, metadata.get(metaname));
+ }
+ }
+ }
+
+ private ContentHandler getContentHandler(TikaConfiguration configuration, OutputStream outputStream)
+ throws TransformerConfigurationException, UnsupportedEncodingException {
+
+ ContentHandler result = null;
+
+ TikaParseOutputFormat outputFormat = configuration.getTikaParseOutputFormat();
+ String encoding = Charset.defaultCharset().name();
+ switch (outputFormat) {
+ case xml:
+ result = getTransformerHandler(outputStream, "xml", encoding, true);
+ break;
+ case text:
+ result = new BodyContentHandler(outputStream);
+ break;
+ case textMain:
+ result = new BoilerpipeContentHandler(getOutputWriter(outputStream, encoding));
+ break;
+ case html:
+ result = new ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", encoding, true));
+ break;
+ default:
+ throw new IllegalArgumentException(
+ String.format("Unknown format %s", tikaConfiguration.getTikaParseOutputFormat()));
+ }
+ return result;
+ }
+
+ private TransformerHandler getTransformerHandler(OutputStream output, String method, String encoding,
+ boolean prettyPrint) throws TransformerConfigurationException {
+ SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
+ TransformerHandler handler = factory.newTransformerHandler();
+ handler.getTransformer().setOutputProperty(OutputKeys.METHOD, method);
+ handler.getTransformer().setOutputProperty(OutputKeys.INDENT, prettyPrint ? "yes" : "no");
+ if (encoding != null) {
+ handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, encoding);
+ }
+ handler.setResult(new StreamResult(output));
+ return handler;
+ }
+
+ private Writer getOutputWriter(OutputStream output, String encoding) throws UnsupportedEncodingException {
+ if (encoding != null) {
+ return new OutputStreamWriter(output, encoding);
+ } else if (System.getProperty("os.name").toLowerCase(Locale.ROOT).startsWith("mac os x")) {
+ return new OutputStreamWriter(output, StandardCharsets.UTF_8);
+ } else {
+ return new OutputStreamWriter(output, Charset.defaultCharset());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/resources/META-INF/LICENSE.txt
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/resources/META-INF/LICENSE.txt b/components/camel-tika/src/main/resources/META-INF/LICENSE.txt
new file mode 100644
index 0000000..43e91eb
--- /dev/null
+++ b/components/camel-tika/src/main/resources/META-INF/LICENSE.txt
@@ -0,0 +1,203 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/resources/META-INF/NOTICE.txt
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/resources/META-INF/NOTICE.txt b/components/camel-tika/src/main/resources/META-INF/NOTICE.txt
new file mode 100644
index 0000000..455e6a5
--- /dev/null
+++ b/components/camel-tika/src/main/resources/META-INF/NOTICE.txt
@@ -0,0 +1,11 @@
+ =========================================================================
+ == NOTICE file corresponding to the section 4 d of ==
+ == the Apache License, Version 2.0, ==
+ == in this case for the Apache Camel distribution. ==
+ =========================================================================
+
+ This product includes software developed by
+ The Apache Software Foundation (http://www.apache.org/).
+
+ Please read the different LICENSE files present in the licenses directory of
+ this distribution.
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/main/resources/META-INF/services/org/apache/camel/component/tika
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/resources/META-INF/services/org/apache/camel/component/tika b/components/camel-tika/src/main/resources/META-INF/services/org/apache/camel/component/tika
new file mode 100644
index 0000000..1a07241
--- /dev/null
+++ b/components/camel-tika/src/main/resources/META-INF/services/org/apache/camel/component/tika
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+class=org.apache.camel.component.tika.TikaComponent
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaDetectTest.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaDetectTest.java b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaDetectTest.java
new file mode 100644
index 0000000..7ca1ed1
--- /dev/null
+++ b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaDetectTest.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import java.io.File;
+
+import org.apache.camel.EndpointInject;
+import org.apache.camel.Exchange;
+import org.apache.camel.Predicate;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.mock.MockEndpoint;
+import org.apache.camel.test.junit4.CamelTestSupport;
+
+import org.junit.Test;
+
+import static org.hamcrest.Matchers.containsString;
+import static org.hamcrest.Matchers.instanceOf;
+
+public class TikaDetectTest extends CamelTestSupport {
+
+ @EndpointInject(uri = "mock:result")
+ protected MockEndpoint resultEndpoint;
+
+ @Test
+ public void testDocumentDetect() throws Exception {
+ File document = new File("src/test/resources/test.doc");
+ template.sendBody("direct:start", document);
+
+ resultEndpoint.setExpectedMessageCount(1);
+
+ resultEndpoint.expectedMessagesMatches(new Predicate() {
+ @Override
+ public boolean matches(Exchange exchange) {
+ Object body = exchange.getIn().getBody(String.class);
+ assertThat(body, instanceOf(String.class));
+ assertThat((String) body, containsString("application/x-tika-msoffice"));
+ return true;
+ }
+ });
+ resultEndpoint.assertIsSatisfied();
+ }
+
+ @Test
+ public void testImageDetect() throws Exception {
+ File document = new File("src/test/resources/testGIF.gif");
+ template.sendBody("direct:start", document);
+
+ resultEndpoint.setExpectedMessageCount(1);
+
+ resultEndpoint.expectedMessagesMatches(new Predicate() {
+ @Override
+ public boolean matches(Exchange exchange) {
+ Object body = exchange.getIn().getBody(String.class);
+ assertThat(body, instanceOf(String.class));
+ assertThat((String) body, containsString("image/gif"));
+ return true;
+ }
+ });
+ resultEndpoint.assertIsSatisfied();
+ }
+
+ @Override
+ protected RouteBuilder createRouteBuilder() throws Exception {
+ return new RouteBuilder() {
+ @Override
+ public void configure() throws Exception {
+ from("direct:start").to("tika:detect").to("mock:result");
+ }
+ };
+ }
+}
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaEmptyConfig.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaEmptyConfig.java b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaEmptyConfig.java
new file mode 100644
index 0000000..c30968f
--- /dev/null
+++ b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaEmptyConfig.java
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.xml.sax.SAXException;
+
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+
+public class TikaEmptyConfig extends TikaConfig {
+
+ public TikaEmptyConfig() throws TikaException, IOException, SAXException {
+ super(new File("src/test/resources/tika-empty.xml"));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
new file mode 100644
index 0000000..dc6d97e
--- /dev/null
+++ b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.camel.component.tika;
+
+import java.io.File;
+import java.util.Map;
+
+import org.apache.camel.EndpointInject;
+import org.apache.camel.Exchange;
+import org.apache.camel.Predicate;
+import org.apache.camel.builder.RouteBuilder;
+import org.apache.camel.component.mock.MockEndpoint;
+import org.apache.camel.impl.JndiRegistry;
+import org.apache.camel.test.junit4.CamelTestSupport;
+import org.junit.Test;
+import static org.hamcrest.Matchers.*;
+
+public class TikaParseTest extends CamelTestSupport {
+
+ @EndpointInject(uri = "mock:result")
+ protected MockEndpoint resultEndpoint;
+
+ @Test
+ public void testDocumentParse() throws Exception {
+
+ File document = new File("src/test/resources/test.doc");
+ template.sendBody("direct:start", document);
+
+ resultEndpoint.setExpectedMessageCount(1);
+
+ resultEndpoint.expectedMessagesMatches(new Predicate() {
+ @Override
+ public boolean matches(Exchange exchange) {
+ Object body = exchange.getIn().getBody(String.class);
+ Map<String, Object> headerMap = exchange.getIn().getHeaders();
+ assertThat(body, instanceOf(String.class));
+ assertThat((String) body, containsString("test"));
+ assertThat(headerMap.get("TikaContent-Type"), equalTo("application/msword"));
+ return true;
+ }
+ });
+ resultEndpoint.assertIsSatisfied();
+ }
+
+ @Test
+ public void testImageParse() throws Exception {
+ File document = new File("src/test/resources/testGIF.gif");
+ template.sendBody("direct:start", document);
+
+ resultEndpoint.setExpectedMessageCount(1);
+
+ resultEndpoint.expectedMessagesMatches(new Predicate() {
+ @Override
+ public boolean matches(Exchange exchange) {
+ Object body = exchange.getIn().getBody(String.class);
+ Map<String, Object> headerMap = exchange.getIn().getHeaders();
+ assertThat(body, instanceOf(String.class));
+ assertThat((String) body, containsString("<body/>"));
+ assertThat(headerMap.get("TikaContent-Type"), equalTo("image/gif"));
+ return true;
+ }
+ });
+ resultEndpoint.assertIsSatisfied();
+ }
+
+ @Test
+ public void testEmptyConfigDocumentParse() throws Exception {
+ File document = new File("src/test/resources/test.doc");
+ template.sendBody("direct:start3", document);
+
+ resultEndpoint.setExpectedMessageCount(1);
+
+ resultEndpoint.expectedMessagesMatches(new Predicate() {
+ @Override
+ public boolean matches(Exchange exchange) {
+ Object body = exchange.getIn().getBody(String.class);
+ Map<String, Object> headerMap = exchange.getIn().getHeaders();
+ assertThat(body, instanceOf(String.class));
+ assertThat((String) body, containsString("<body/>"));
+ assertThat(headerMap.get("TikaContent-Type"), equalTo("application/msword"));
+ return true;
+ }
+ });
+ resultEndpoint.assertIsSatisfied();
+ }
+
+ @Test
+ public void testRegistryConfigDocumentParse() throws Exception {
+ File document = new File("src/test/resources/test.doc");
+ template.sendBody("direct:start3", document);
+
+ resultEndpoint.setExpectedMessageCount(1);
+
+ resultEndpoint.expectedMessagesMatches(new Predicate() {
+ @Override
+ public boolean matches(Exchange exchange) {
+ Object body = exchange.getIn().getBody(String.class);
+ Map<String, Object> headerMap = exchange.getIn().getHeaders();
+ assertThat(body, instanceOf(String.class));
+ assertThat((String) body, containsString("<body/>"));
+ assertThat(headerMap.get("TikaContent-Type"), equalTo("application/msword"));
+ return true;
+ }
+ });
+ resultEndpoint.assertIsSatisfied();
+ }
+
+ @Override
+ protected RouteBuilder createRouteBuilder() throws Exception {
+ return new RouteBuilder() {
+ @Override
+ public void configure() throws Exception {
+ from("direct:start").to("tika:parse").to("mock:result");
+ from("direct:start2").to("tika:parse?tikaConfigUri=src/test/resources/tika-empty.xml")
+ .to("mock:result");
+ from("direct:start3").to("tika:parse?tikaConfig=#testConfig").to("mock:result");
+ }
+ };
+ }
+
+ @Override
+ protected JndiRegistry createRegistry() throws Exception {
+ JndiRegistry reg = super.createRegistry();
+ reg.bind("testConfig", new TikaEmptyConfig());
+ return reg;
+ }
+}
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/test/resources/test.doc
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/resources/test.doc b/components/camel-tika/src/test/resources/test.doc
new file mode 100644
index 0000000..93198c8
Binary files /dev/null and b/components/camel-tika/src/test/resources/test.doc differ
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/test/resources/testGIF.gif
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/resources/testGIF.gif b/components/camel-tika/src/test/resources/testGIF.gif
new file mode 100644
index 0000000..e09e641
Binary files /dev/null and b/components/camel-tika/src/test/resources/testGIF.gif differ
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/camel-tika/src/test/resources/tika-empty.xml
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/resources/tika-empty.xml b/components/camel-tika/src/test/resources/tika-empty.xml
new file mode 100644
index 0000000..56504c4
--- /dev/null
+++ b/components/camel-tika/src/test/resources/tika-empty.xml
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <parsers>
+ <parser class="org.apache.tika.parser.EmptyParser"/>
+ </parsers>
+</properties>
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/pom.xml
----------------------------------------------------------------------
diff --git a/components/pom.xml b/components/pom.xml
index 4276240..90b092e 100644
--- a/components/pom.xml
+++ b/components/pom.xml
@@ -265,6 +265,7 @@
<module>camel-tagsoup</module>
<module>camel-tarfile</module>
<module>camel-telegram</module>
+ <module>camel-tika</module>
<module>camel-twitter</module>
<module>camel-undertow</module>
<module>camel-univocity-parsers</module>
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/components/readme.adoc
----------------------------------------------------------------------
diff --git a/components/readme.adoc b/components/readme.adoc
index 190f8ea..fccd05c 100644
--- a/components/readme.adoc
+++ b/components/readme.adoc
@@ -543,6 +543,9 @@ Components
| link:camel-telegram/src/main/docs/telegram-component.adoc[Telegram] (camel-telegram) +
`telegram:type/authorizationToken` | The telegram component provides access to the Telegram Bot API.
+| link:camel-tika/src/main/docs/tika-component.adoc[Tika] (camel-tika) +
+`tika:operation` | This component integrates with Apache Tika to extract content and metadata from thousands of file types.
+
| link:camel-twitter/src/main/docs/twitter-component.adoc[Twitter] (camel-twitter) +
`twitter:kind` | This component integrates with Twitter to send tweets or search for tweets and more.
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/docs/user-manual/en/SUMMARY.md
----------------------------------------------------------------------
diff --git a/docs/user-manual/en/SUMMARY.md b/docs/user-manual/en/SUMMARY.md
index b7dd18c..d93d575 100644
--- a/docs/user-manual/en/SUMMARY.md
+++ b/docs/user-manual/en/SUMMARY.md
@@ -303,6 +303,7 @@
* [Stream](stream-component.adoc)
* [String Template](string-template-component.adoc)
* [Telegram](telegram-component.adoc)
+ * [Tika](tika-component.adoc)
* [Twitter](twitter-component.adoc)
* [Undertow](undertow-component.adoc)
* [Velocity](velocity-component.adoc)
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/parent/pom.xml
----------------------------------------------------------------------
diff --git a/parent/pom.xml b/parent/pom.xml
index a3692a7..0d9fe43 100644
--- a/parent/pom.xml
+++ b/parent/pom.xml
@@ -614,6 +614,7 @@
<tagsoup-version>1.2.1</tagsoup-version>
<testng-version>6.8.21</testng-version>
<tinybundles-version>2.1.1</tinybundles-version>
+ <tika-version>1.14</tika-version>
<twitter4j-bundle-version>4.0.6_1</twitter4j-bundle-version>
<twitter4j-version>4.0.6</twitter4j-version>
<undertow-version>1.4.8.Final</undertow-version>
@@ -1896,6 +1897,11 @@
</dependency>
<dependency>
<groupId>org.apache.camel</groupId>
+ <artifactId>camel-tika</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.camel</groupId>
<artifactId>camel-twitter</artifactId>
<version>${project.version}</version>
</dependency>
http://git-wip-us.apache.org/repos/asf/camel/blob/17c83bab/platforms/karaf/features/src/main/resources/features.xml
----------------------------------------------------------------------
diff --git a/platforms/karaf/features/src/main/resources/features.xml b/platforms/karaf/features/src/main/resources/features.xml
index 0993d46..20d52f5 100644
--- a/platforms/karaf/features/src/main/resources/features.xml
+++ b/platforms/karaf/features/src/main/resources/features.xml
@@ -1881,6 +1881,12 @@
<bundle>mvn:org.apache.camel/camel-test/${project.version}</bundle>
<bundle>mvn:org.apache.camel/camel-test-spring/${project.version}</bundle>
</feature>
+ <feature name='camel-tika' version='${project.version}' resolver='(obr)' start-level='50'>
+ <bundle dependency='true'>mvn:org.apache.tika/tika-core/${tika-version}</bundle>
+ <bundle dependency='true'>mvn:org.apache.tika/tika-bundle/${tika-version}</bundle>
+ <feature version='${project.version}'>camel-core</feature>
+ <bundle>mvn:org.apache.camel/camel-tika/${project.version}</bundle>
+ </feature>
<feature name='camel-twitter' version='${project.version}' resolver='(obr)' start-level='50'>
<feature version='${project.version}'>camel-core</feature>
<bundle dependency='true'>mvn:org.apache.servicemix.bundles/org.apache.servicemix.bundles.twitter4j/${twitter4j-bundle-version}</bundle>
[3/6] camel git commit: CAMEL-10740 - Code cleanup and encoding
support.
Posted by da...@apache.org.
CAMEL-10740 - Code cleanup and encoding support.
Project: http://git-wip-us.apache.org/repos/asf/camel/repo
Commit: http://git-wip-us.apache.org/repos/asf/camel/commit/c73068e7
Tree: http://git-wip-us.apache.org/repos/asf/camel/tree/c73068e7
Diff: http://git-wip-us.apache.org/repos/asf/camel/diff/c73068e7
Branch: refs/heads/master
Commit: c73068e7d42f5f8a83b218463389383d6fb26837
Parents: 17c83ba
Author: Bob Paulin <bo...@bobpaulin.com>
Authored: Sat Jan 28 23:58:12 2017 -0600
Committer: Claus Ibsen <da...@apache.org>
Committed: Sun Jan 29 17:06:27 2017 +0100
----------------------------------------------------------------------
components/camel-tika/pom.xml | 141 +++++++++----------
.../src/main/docs/tika-component.adoc | 8 +-
.../camel/component/tika/TikaConfiguration.java | 26 +++-
.../camel/component/tika/TikaEndpoint.java | 2 +-
.../camel/component/tika/TikaProducer.java | 38 ++---
.../camel/component/tika/TikaParseTest.java | 67 ++++++++-
.../src/test/resources/testOpenOffice2.odt | Bin 0 -> 26460 bytes
7 files changed, 175 insertions(+), 107 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/pom.xml
----------------------------------------------------------------------
diff --git a/components/camel-tika/pom.xml b/components/camel-tika/pom.xml
index 86f0131..6233b9f 100644
--- a/components/camel-tika/pom.xml
+++ b/components/camel-tika/pom.xml
@@ -15,80 +15,79 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>components</artifactId>
+ <version>2.19.0-SNAPSHOT</version>
+ </parent>
- <parent>
- <groupId>org.apache.camel</groupId>
- <artifactId>components</artifactId>
- <version>2.19.0-SNAPSHOT</version>
- </parent>
+ <artifactId>camel-tika</artifactId>
+ <packaging>jar</packaging>
+ <name>Camel :: Tika</name>
+ <description>This component integrates with Apache Tika to extract content and metadata from thousands of file types.</description>
- <artifactId>camel-tika</artifactId>
- <packaging>jar</packaging>
- <name>Camel :: Tika</name>
- <description>This component integrates with Apache Tika to extract content and metadata from thousands of file types.</description>
+ <properties>
+ <camel.osgi.export.pkg>org.apache.camel.component.tika.*</camel.osgi.export.pkg>
+ <camel.osgi.export.service>org.apache.camel.spi.ComponentResolver;component=tika</camel.osgi.export.service>
+ </properties>
- <properties>
- <camel.osgi.export.pkg>org.apache.camel.component.tika.*</camel.osgi.export.pkg>
- <camel.osgi.export.service>org.apache.camel.spi.ComponentResolver;component=tika</camel.osgi.export.service>
- </properties>
-
- <dependencies>
-
- <dependency>
- <groupId>org.apache.camel</groupId>
- <artifactId>camel-core</artifactId>
- </dependency>
- <dependency>
- <groupId>org.apache.tika</groupId>
- <artifactId>tika-core</artifactId>
- <version>${tika-version}</version>
- </dependency>
- <dependency>
- <groupId>org.apache.tika</groupId>
- <artifactId>tika-parsers</artifactId>
- <version>${tika-version}</version>
- </dependency>
- <!-- test dependencies -->
- <dependency>
- <groupId>org.apache.camel</groupId>
- <artifactId>camel-test-spring</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.logging.log4j</groupId>
- <artifactId>log4j-api</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.logging.log4j</groupId>
- <artifactId>log4j-core</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.logging.log4j</groupId>
- <artifactId>log4j-slf4j-impl</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>commons-io</groupId>
- <artifactId>commons-io</artifactId>
- <version>${commons-io-version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.hamcrest</groupId>
- <artifactId>java-hamcrest</artifactId>
- <version>${hamcrest-version}</version>
- <scope>test</scope>
- </dependency>
- </dependencies>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${tika-version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parsers</artifactId>
+ <version>${tika-version}</version>
+ </dependency>
+ <!-- test dependencies -->
+ <dependency>
+ <groupId>org.apache.camel</groupId>
+ <artifactId>camel-test-spring</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-api</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-core</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.logging.log4j</groupId>
+ <artifactId>log4j-slf4j-impl</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>${commons-io-version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.hamcrest</groupId>
+ <artifactId>java-hamcrest</artifactId>
+ <version>${hamcrest-version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
</project>
http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/src/main/docs/tika-component.adoc
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/docs/tika-component.adoc b/components/camel-tika/src/main/docs/tika-component.adoc
index 7049a59..f077452 100644
--- a/components/camel-tika/src/main/docs/tika-component.adoc
+++ b/components/camel-tika/src/main/docs/tika-component.adoc
@@ -41,7 +41,7 @@ The Tika component has no options.
// endpoint options: START
-The Tika component supports 5 endpoint options which are listed below:
+The Tika component supports 6 endpoint options which are listed below:
{% raw %}
[width="100%",cols="2,1,1m,1m,5",options="header"]
@@ -49,8 +49,9 @@ The Tika component supports 5 endpoint options which are listed below:
| Name | Group | Default | Java Type | Description
| operation | producer | | TikaOperation | *Required* Tika Operation. parse or detect
| tikaConfig | producer | | TikaConfig | Tika Config
-| tikaConfigUri | producer | | String | Tika Config Uri
-| tikaParseOutputFormat | producer | xml | TikaParseOutputFormat | Tika Output Format. Supported output formats are xml html text textMain
+| tikaConfigUri | producer | | String | Tika Config Uri: The URI of tika-config.xml
+| tikaParseOutputEncoding | producer | | String | Tika Parse Output Encoding - Used to specify the character encoding of the parsed output. Defaults to Charset.defaultCharset() .
+| tikaParseOutputFormat | producer | xml | TikaParseOutputFormat | Tika Output Format. Supported output formats. xml: Returns Parsed Content as XML. html: Returns Parsed Content as HTML. text: Returns Parsed Content as Text. textMain: Uses the boilerpipe library to automatically extract the main content from a web page.
| synchronous | advanced | false | boolean | Sets whether synchronous processing should be strictly used or Camel is allowed to use asynchronous processing (if supported).
|=======================================================================
{% endraw %}
@@ -61,7 +62,6 @@ The Tika component supports 5 endpoint options which are listed below:
[width="100%",cols="10%,90%",options="header",]
|=======================================================================
|Header |Description
-|TikaXXXX | Any Tika Metadata Header is converted to a Camel Header with Prefix Tika
|=======================================================================
### To Detect a file's MIME Type
http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
index 051ad2a..33542c0 100644
--- a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaConfiguration.java
@@ -17,6 +17,7 @@
package org.apache.camel.component.tika;
import java.io.IOException;
+import java.nio.charset.Charset;
import org.xml.sax.SAXException;
@@ -36,6 +37,8 @@ public class TikaConfiguration {
private TikaOperation operation;
@UriParam(defaultValue = "xml")
private TikaParseOutputFormat tikaParseOutputFormat = TikaParseOutputFormat.xml;
+ @UriParam(description = "Tika Parse Output Encoding")
+ private String tikaParseOutputEncoding = Charset.defaultCharset().name();
@UriParam(description = "Tika Config")
private TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
@UriParam(description = "Tika Config Url")
@@ -64,12 +67,31 @@ public class TikaConfiguration {
/**
*
- * Tika Output Format. Supported output formats are xml, html, text, textMain
+ * Tika Output Format. Supported output formats.
+ * <ul>
+ * <li>xml: Returns Parsed Content as XML. </li>
+ * <li>html: Returns Parsed Content as HTML. </li>
+ * <li>text: Returns Parsed Content as Text. </li>
+ * <li>textMain: Uses the <a href="http://code.google.com/p/boilerpipe/">boilerpipe</a> library to automatically extract the main content from a web page. </li>
+ * </ul>
*
*/
public void setTikaParseOutputFormat(TikaParseOutputFormat tikaParseOutputFormat) {
this.tikaParseOutputFormat = tikaParseOutputFormat;
}
+
+ public String getTikaParseOutputEncoding() {
+ return tikaParseOutputEncoding;
+ }
+
+ /**
+ * Tika Parse Output Encoding - Used to specify the character encoding of the parsed output.
+ * Defaults to Charset.defaultCharset() .
+ *
+ */
+ public void setTikaParseOutputEncoding(String tikaParseOutputEncoding) {
+ this.tikaParseOutputEncoding = tikaParseOutputEncoding;
+ }
public TikaConfig getTikaConfig() {
return tikaConfig;
@@ -90,7 +112,7 @@ public class TikaConfiguration {
/**
*
- * Tika Config Uri
+ * Tika Config Uri: The URI of tika-config.xml
*
*/
public void setTikaConfigUri(String tikaConfigUri) throws TikaException, IOException, SAXException {
http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
index cb8fbdd..a1701d3 100644
--- a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaEndpoint.java
@@ -24,7 +24,7 @@ import org.apache.camel.impl.DefaultEndpoint;
import org.apache.camel.spi.UriEndpoint;
import org.apache.camel.spi.UriParam;
-@UriEndpoint(scheme = "tika", title = "Tika", syntax = "tika:operation", producerOnly = true, label = "tika")
+@UriEndpoint(scheme = "tika", title = "Tika", syntax = "tika:operation", producerOnly = true, label = "transformation")
public class TikaEndpoint extends DefaultEndpoint {
@UriParam
http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
index 1e0d9ca..309df98 100644
--- a/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
+++ b/components/camel-tika/src/main/java/org/apache/camel/component/tika/TikaProducer.java
@@ -22,10 +22,6 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
-import java.io.Writer;
-import java.nio.charset.Charset;
-import java.nio.charset.StandardCharsets;
-import java.util.Locale;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.TransformerConfigurationException;
@@ -57,10 +53,13 @@ public class TikaProducer extends DefaultProducer {
private final Parser parser;
private final Detector detector;
+
+ private final String encoding;
public TikaProducer(TikaEndpoint endpoint) {
super(endpoint);
this.tikaConfiguration = endpoint.getTikaConfiguration();
+ this.encoding = this.tikaConfiguration.getTikaParseOutputEncoding();
TikaConfig config = this.tikaConfiguration.getTikaConfig();
this.parser = new AutoDetectParser(config);
this.detector = config.getDetector();
@@ -111,7 +110,7 @@ public class TikaProducer extends DefaultProducer {
private void convertMetadataToHeaders(Metadata metadata, Exchange exchange) {
if (metadata != null) {
for (String metaname : metadata.names()) {
- exchange.getIn().setHeader("Tika" + metaname, metadata.get(metaname));
+ exchange.getIn().setHeader(metaname, metadata.get(metaname));
}
}
}
@@ -122,19 +121,18 @@ public class TikaProducer extends DefaultProducer {
ContentHandler result = null;
TikaParseOutputFormat outputFormat = configuration.getTikaParseOutputFormat();
- String encoding = Charset.defaultCharset().name();
switch (outputFormat) {
case xml:
- result = getTransformerHandler(outputStream, "xml", encoding, true);
+ result = getTransformerHandler(outputStream, "xml", true);
break;
case text:
- result = new BodyContentHandler(outputStream);
+ result = new BodyContentHandler(new OutputStreamWriter(outputStream, this.encoding));
break;
case textMain:
- result = new BoilerpipeContentHandler(getOutputWriter(outputStream, encoding));
+ result = new BoilerpipeContentHandler(new OutputStreamWriter(outputStream, this.encoding));
break;
case html:
- result = new ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", encoding, true));
+ result = new ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", true));
break;
default:
throw new IllegalArgumentException(
@@ -143,26 +141,16 @@ public class TikaProducer extends DefaultProducer {
return result;
}
- private TransformerHandler getTransformerHandler(OutputStream output, String method, String encoding,
- boolean prettyPrint) throws TransformerConfigurationException {
+ private TransformerHandler getTransformerHandler(OutputStream output, String method,
+ boolean prettyPrint) throws TransformerConfigurationException, UnsupportedEncodingException {
SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
TransformerHandler handler = factory.newTransformerHandler();
handler.getTransformer().setOutputProperty(OutputKeys.METHOD, method);
handler.getTransformer().setOutputProperty(OutputKeys.INDENT, prettyPrint ? "yes" : "no");
- if (encoding != null) {
- handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, encoding);
+ if (this.encoding != null) {
+ handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, this.encoding);
}
- handler.setResult(new StreamResult(output));
+ handler.setResult(new StreamResult(new OutputStreamWriter(output, this.encoding)));
return handler;
}
-
- private Writer getOutputWriter(OutputStream output, String encoding) throws UnsupportedEncodingException {
- if (encoding != null) {
- return new OutputStreamWriter(output, encoding);
- } else if (System.getProperty("os.name").toLowerCase(Locale.ROOT).startsWith("mac os x")) {
- return new OutputStreamWriter(output, StandardCharsets.UTF_8);
- } else {
- return new OutputStreamWriter(output, Charset.defaultCharset());
- }
- }
}
http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
index dc6d97e..1db2a8d 100644
--- a/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
+++ b/components/camel-tika/src/test/java/org/apache/camel/component/tika/TikaParseTest.java
@@ -16,7 +16,15 @@
*/
package org.apache.camel.component.tika;
+import java.io.ByteArrayInputStream;
import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.Map;
import org.apache.camel.EndpointInject;
@@ -26,7 +34,11 @@ import org.apache.camel.builder.RouteBuilder;
import org.apache.camel.component.mock.MockEndpoint;
import org.apache.camel.impl.JndiRegistry;
import org.apache.camel.test.junit4.CamelTestSupport;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.txt.UniversalEncodingDetector;
import org.junit.Test;
+import org.mozilla.universalchardet.UniversalDetector;
+
import static org.hamcrest.Matchers.*;
public class TikaParseTest extends CamelTestSupport {
@@ -48,8 +60,54 @@ public class TikaParseTest extends CamelTestSupport {
Object body = exchange.getIn().getBody(String.class);
Map<String, Object> headerMap = exchange.getIn().getHeaders();
assertThat(body, instanceOf(String.class));
+
+ Charset detectedCharset = null;
+ try {
+ InputStream bodyIs = new ByteArrayInputStream(((String)body).getBytes());
+ UniversalEncodingDetector encodingDetector = new UniversalEncodingDetector();
+ detectedCharset = encodingDetector.detect(bodyIs, new Metadata());
+ } catch (IOException e1) {
+ fail();
+ }
+
+
+ assertThat(detectedCharset.name(), startsWith(Charset.defaultCharset().name()));
+
assertThat((String) body, containsString("test"));
- assertThat(headerMap.get("TikaContent-Type"), equalTo("application/msword"));
+ assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("application/msword"));
+ return true;
+ }
+ });
+ resultEndpoint.assertIsSatisfied();
+ }
+
+ @Test
+ public void testDocumentParseWithEncoding() throws Exception {
+
+ File document = new File("src/test/resources/testOpenOffice2.odt");
+ template.sendBody("direct:start4", document);
+
+ resultEndpoint.setExpectedMessageCount(1);
+
+ resultEndpoint.expectedMessagesMatches(new Predicate() {
+ @Override
+ public boolean matches(Exchange exchange) {
+ Object body = exchange.getIn().getBody(String.class);
+ Map<String, Object> headerMap = exchange.getIn().getHeaders();
+ assertThat(body, instanceOf(String.class));
+
+ Charset detectedCharset = null;
+ try {
+ InputStream bodyIs = new ByteArrayInputStream(((String)body).getBytes(StandardCharsets.UTF_16));
+ UniversalEncodingDetector encodingDetector = new UniversalEncodingDetector();
+ detectedCharset = encodingDetector.detect(bodyIs, new Metadata());
+ } catch (IOException e1) {
+ fail();
+ }
+
+
+ assertThat(detectedCharset.name(), startsWith(StandardCharsets.UTF_16.name()));
+ assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("application/vnd.oasis.opendocument.text"));
return true;
}
});
@@ -70,7 +128,7 @@ public class TikaParseTest extends CamelTestSupport {
Map<String, Object> headerMap = exchange.getIn().getHeaders();
assertThat(body, instanceOf(String.class));
assertThat((String) body, containsString("<body/>"));
- assertThat(headerMap.get("TikaContent-Type"), equalTo("image/gif"));
+ assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("image/gif"));
return true;
}
});
@@ -91,7 +149,7 @@ public class TikaParseTest extends CamelTestSupport {
Map<String, Object> headerMap = exchange.getIn().getHeaders();
assertThat(body, instanceOf(String.class));
assertThat((String) body, containsString("<body/>"));
- assertThat(headerMap.get("TikaContent-Type"), equalTo("application/msword"));
+ assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("application/msword"));
return true;
}
});
@@ -112,7 +170,7 @@ public class TikaParseTest extends CamelTestSupport {
Map<String, Object> headerMap = exchange.getIn().getHeaders();
assertThat(body, instanceOf(String.class));
assertThat((String) body, containsString("<body/>"));
- assertThat(headerMap.get("TikaContent-Type"), equalTo("application/msword"));
+ assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("application/msword"));
return true;
}
});
@@ -128,6 +186,7 @@ public class TikaParseTest extends CamelTestSupport {
from("direct:start2").to("tika:parse?tikaConfigUri=src/test/resources/tika-empty.xml")
.to("mock:result");
from("direct:start3").to("tika:parse?tikaConfig=#testConfig").to("mock:result");
+ from("direct:start4").to("tika:parse?tikaParseOutputEncoding=" + StandardCharsets.UTF_16.name()).to("mock:result");
}
};
}
http://git-wip-us.apache.org/repos/asf/camel/blob/c73068e7/components/camel-tika/src/test/resources/testOpenOffice2.odt
----------------------------------------------------------------------
diff --git a/components/camel-tika/src/test/resources/testOpenOffice2.odt b/components/camel-tika/src/test/resources/testOpenOffice2.odt
new file mode 100644
index 0000000..0b1bb11
Binary files /dev/null and b/components/camel-tika/src/test/resources/testOpenOffice2.odt differ