You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/10/04 19:53:48 UTC

[tika] 04/06: TIKA-3565 -- scrape xmp from jxl

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 7e09f24e5c6dcdd3c0646d0ae026719a50fe1f4a
Author: tallison <ta...@apache.org>
AuthorDate: Mon Oct 4 15:18:52 2021 -0400

    TIKA-3565 -- scrape xmp from jxl
---
 .../org/apache/tika/parser/image/JXLParser.java    |  53 +++++++++++++++++++++
 .../services/org.apache.tika.parser.Parser         |   1 +
 .../java/org/apache/tika/parser/image/JXLTest.java |  37 ++++++++++++++
 .../resources/test-documents/testJXL_ISOBMFF.jxl   | Bin 0 -> 83814 bytes
 .../test/resources/test-documents/testJXL_exif.jxl | Bin 0 -> 2076158 bytes
 5 files changed, 91 insertions(+)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/JXLParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/JXLParser.java
new file mode 100644
index 0000000..cde232e
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/JXLParser.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.xmp.JempboxExtractor;
+
+/**
+ * Tries to scrape XMP out of JXL
+ */
+public class JXLParser extends AbstractParser {
+
+    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.image(
+            "jxl"));
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler, Metadata metadata,
+                      ParseContext context) throws IOException, SAXException, TikaException {
+        JempboxExtractor jempboxExtractor = new JempboxExtractor(metadata);
+        jempboxExtractor.parse(stream);
+    }
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
index 4a48b36..6a6da53 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/resources/META-INF/services/org.apache.tika.parser.Parser
@@ -21,5 +21,6 @@ org.apache.tika.parser.image.WebPParser
 org.apache.tika.parser.image.JpegParser
 org.apache.tika.parser.image.HeifParser
 org.apache.tika.parser.image.ICNSParser
+org.apache.tika.parser.image.JXLParser
 
 
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JXLTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JXLTest.java
new file mode 100644
index 0000000..2fe74b5
--- /dev/null
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JXLTest.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import org.junit.jupiter.api.Test;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.XMPMM;
+
+public class JXLTest extends TikaTest {
+
+    @Test
+    public void testBasicXMP() throws Exception {
+        Metadata metadata = getXML("testJXL_ISOBMFF.jxl").metadata;
+        assertEquals("Unknown Title", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("adobe:docid:photoshop:162ca2dc-6a89-9c46-8fcc-3a7f0e6deb18",
+                metadata.get(XMPMM.DOCUMENTID));
+    }
+}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/resources/test-documents/testJXL_ISOBMFF.jxl b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/resources/test-documents/testJXL_ISOBMFF.jxl
new file mode 100644
index 0000000..d67998e
Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/resources/test-documents/testJXL_ISOBMFF.jxl differ
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/resources/test-documents/testJXL_exif.jxl b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/resources/test-documents/testJXL_exif.jxl
new file mode 100644
index 0000000..e34844f
Binary files /dev/null and b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/resources/test-documents/testJXL_exif.jxl differ