You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@camel.apache.org by ji...@apache.org on 2023/10/05 14:00:09 UTC

[camel-quarkus] 14/45: Tika - aded explabation and limitation regarding pdf, see #5234

This is an automated email from the ASF dual-hosted git repository.

jiriondrusek pushed a commit to branch camel-main
in repository https://gitbox.apache.org/repos/asf/camel-quarkus.git

commit ce95a6301ec4045f2e238f6aee12fa8af295bdce
Author: JiriOndrusek <on...@gmail.com>
AuthorDate: Thu Aug 31 14:28:31 2023 +0200

    Tika - aded explabation and limitation regarding pdf, see #5234
---
 docs/modules/ROOT/pages/reference/extensions/tika.adoc                | 2 ++
 extensions/tika/runtime/src/main/doc/limitations.adoc                 | 2 ++
 integration-tests/tika/src/main/resources/application.properties      | 3 ++-
 .../java/org/apache/camel/quarkus/component/tika/it/TikaTest.java     | 4 ++--
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/docs/modules/ROOT/pages/reference/extensions/tika.adoc b/docs/modules/ROOT/pages/reference/extensions/tika.adoc
index bf5efdddc7..56712b2cda 100644
--- a/docs/modules/ROOT/pages/reference/extensions/tika.adoc
+++ b/docs/modules/ROOT/pages/reference/extensions/tika.adoc
@@ -54,6 +54,8 @@ can be changed only via `application.properties`.
 While you can use any of the available https://tika.apache.org/1.24.1/formats.html[Tika parsers] in JVM mode,
 only some of those are supported in native mode - see the https://quarkiverse.github.io/quarkiverse-docs/quarkus-tika/dev/index.html[Quarkus Tika guide].
 
+PDF and ODF parsers can not be used both in JVM mode or in the native mode. Pdf extension is suggested for purposes of pdf consumption to avoid a version conflict between Camel and Quarkus-tika extension involving PdfBox dependency.
+
 Use of the Tika parser without any configuration will initialize all available parsers. Unfortunately as some of them
 don't work in the native mode, the whole execution will fail.
 
diff --git a/extensions/tika/runtime/src/main/doc/limitations.adoc b/extensions/tika/runtime/src/main/doc/limitations.adoc
index 8edfbeddbc..1d0f7a75c4 100644
--- a/extensions/tika/runtime/src/main/doc/limitations.adoc
+++ b/extensions/tika/runtime/src/main/doc/limitations.adoc
@@ -4,6 +4,8 @@ can be changed only via `application.properties`.
 While you can use any of the available https://tika.apache.org/1.24.1/formats.html[Tika parsers] in JVM mode,
 only some of those are supported in native mode - see the https://quarkiverse.github.io/quarkiverse-docs/quarkus-tika/dev/index.html[Quarkus Tika guide].
 
+PDF and ODF parsers can not be used both in JVM mode or in the native mode. Pdf extension is suggested for purposes of pdf consumption to avoid a version conflict between Camel and Quarkus-tika extension involving PdfBox dependency.
+
 Use of the Tika parser without any configuration will initialize all available parsers. Unfortunately as some of them
 don't work in the native mode, the whole execution will fail.
 
diff --git a/integration-tests/tika/src/main/resources/application.properties b/integration-tests/tika/src/main/resources/application.properties
index fb3468886b..536d32868c 100644
--- a/integration-tests/tika/src/main/resources/application.properties
+++ b/integration-tests/tika/src/main/resources/application.properties
@@ -15,7 +15,8 @@
 ## limitations under the License.
 ## ---------------------------------------------------------------------------
 
-quarkus.tika.parsers= pdf,odf,office,xml,image
+#quarkus.tika.parsers= pdf,odf,office,xml,image //Requires new release of quarkiverse-tike, which adopts tika with pdfBox 3.x
+quarkus.tika.parsers= odf,office,xml,image
 quarkus.tika.parser.office = org.apache.tika.parser.microsoft.OfficeParser
 quarkus.tika.parser.image = org.apache.tika.parser.image.ImageParser
 quarkus.tika.parser.xml = org.apache.tika.parser.xml.DcXMLParser
\ No newline at end of file
diff --git a/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaTest.java b/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaTest.java
index 45fc59d695..adf61f13f1 100644
--- a/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaTest.java
+++ b/integration-tests/tika/src/test/java/org/apache/camel/quarkus/component/tika/it/TikaTest.java
@@ -34,13 +34,13 @@ import static org.hamcrest.Matchers.startsWith;
 @QuarkusTest
 class TikaTest {
 
-    @Disabled //https://github.com/apache/camel-quarkus/issues/5234
+    @Disabled //Requires new release of quarkiverse-tike, which adopts tika with pdfBox 3.x https://github.com/apache/camel-quarkus/issues/5234
     @Test
     public void testPdf() throws Exception {
         testParse("quarkus.pdf", "application/pdf", "Hello Quarkus");
     }
 
-    @Disabled //https://github.com/apache/camel-quarkus/issues/5234
+    @Disabled //Requires new release of quarkiverse-tike, which adopts tika with pdfBox 3.x https://github.com/apache/camel-quarkus/issues/5234
     @Test
     public void testOdf() throws Exception {
         testParse("testOpenOffice2.odt", "application/vnd.oasis.opendocument.text",