You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/13 15:17:36 UTC
[1/7] tika git commit: TIKA-1999 add limit to number of events
extracted from the XMPMM section by the JempboxExtractor
Repository: tika
Updated Branches:
refs/heads/TIKA-1508 e48d19156 -> ef1f7b9ec
TIKA-1999 add limit to number of events extracted from the XMPMM section by the JempboxExtractor
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/3e145053
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/3e145053
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/3e145053
Branch: refs/heads/TIKA-1508
Commit: 3e14505381eefa603adabe61171c0c19fc685b2f
Parents: 1af1078
Author: tballison <ta...@mitre.org>
Authored: Wed Jun 8 11:45:30 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Wed Jun 8 11:45:30 2016 -0400
----------------------------------------------------------------------
.../tika/parser/image/xmp/JempboxExtractor.java | 31 ++++
.../parser/image/xmp/JempboxExtractorTest.java | 29 ++-
.../test/resources/test-documents/testXMP.xmp | 178 +++++++++++++++++++
3 files changed, 237 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/3e145053/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
index 0f326a8..d9ae71d 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
@@ -42,15 +42,21 @@ import org.xml.sax.SAXException;
public class JempboxExtractor {
+
+ private static int MAX_EVENT_HISTORY_IN_XMPMM = 1024;
+
// The XMP spec says it must be unicode, but for most file formats it specifies "must be encoded in UTF-8"
private static final String DEFAULT_XMP_CHARSET = UTF_8.name();
+
private XMPPacketScanner scanner = new XMPPacketScanner();
private Metadata metadata;
+ private static int maxXMPMMHistory;
public JempboxExtractor(Metadata metadata) {
this.metadata = metadata;
}
+
public void parse(InputStream file) throws IOException, TikaException {
ByteArrayOutputStream xmpraw = new ByteArrayOutputStream();
if (!scanner.parse(file, xmpraw)) {
@@ -160,7 +166,11 @@ public class JempboxExtractor {
//in DerivedFrom section
}
if (mmSchema.getHistory() != null) {
+ int eventsAdded = 0;
for (ResourceEvent stevt : mmSchema.getHistory()) {
+ if (eventsAdded >= MAX_EVENT_HISTORY_IN_XMPMM) {
+ break;
+ }
String instanceId = null;
String action = null;
Calendar when = null;
@@ -188,6 +198,7 @@ public class JempboxExtractor {
metadata.add(XMPMM.HISTORY_ACTION, action);
metadata.add(XMPMM.HISTORY_WHEN, dateString);
metadata.add(XMPMM.HISTORY_SOFTWARE_AGENT, softwareAgent);
+ eventsAdded++;
}
}
}
@@ -199,4 +210,24 @@ public class JempboxExtractor {
m.add(p, value);
}
}
+
+ /**
+ * Maximum number of events to extract from the
+ * event history in the XMP Media Management (XMPMM) section.
+ * The extractor will silently stop adding events after it
+ * has reached this threshold.
+ * <p>
+ * The default is 1024.
+ */
+ public static void setMaxXMPMMHistory(int maxEvents) {
+ MAX_EVENT_HISTORY_IN_XMPMM = maxEvents;
+ }
+
+ /**
+ *
+ * @return maximum number of events to extract from the XMPMM history.
+ */
+ public static int getMaxXMPMMHistory() {
+ return maxXMPMMHistory;
+ }
}
http://git-wip-us.apache.org/repos/asf/tika/blob/3e145053/tika-parsers/src/test/java/org/apache/tika/parser/image/xmp/JempboxExtractorTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/image/xmp/JempboxExtractorTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/image/xmp/JempboxExtractorTest.java
index 4718539..cdbf5eb 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/image/xmp/JempboxExtractorTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/image/xmp/JempboxExtractorTest.java
@@ -19,17 +19,24 @@ package org.apache.tika.parser.image.xmp;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
+import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collection;
+import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.XMPMM;
+import org.apache.tika.parser.ParseContext;
import org.junit.Test;
-public class JempboxExtractorTest {
+import javax.xml.parsers.DocumentBuilder;
+
+public class JempboxExtractorTest extends TikaTest {
@Test
public void testParseJpeg() throws IOException, TikaException {
@@ -104,4 +111,24 @@ public class JempboxExtractorTest {
Arrays.asList("Mr B", "Mr A")));
}
+ @Test
+ public void testMaxXMPMMHistory() throws Exception {
+ int maxHistory = JempboxExtractor.getMaxXMPMMHistory();
+ try {
+ Metadata m = new Metadata();
+ JempboxExtractor ex = new JempboxExtractor(m);
+ ex.parse(getResourceAsStream("/test-documents/testXMP.xmp"));
+ assertEquals(7, m.getValues(XMPMM.HISTORY_EVENT_INSTANCEID).length);
+
+ JempboxExtractor.setMaxXMPMMHistory(5);
+ m = new Metadata();
+ ex = new JempboxExtractor(m);
+ ex.parse(getResourceAsStream("/test-documents/testXMP.xmp"));
+ assertEquals(5, m.getValues(XMPMM.HISTORY_EVENT_INSTANCEID).length);
+ } finally {
+ //if something goes wrong, make sure to set this back to what it was
+ JempboxExtractor.setMaxXMPMMHistory(maxHistory);
+ }
+ }
+
}
http://git-wip-us.apache.org/repos/asf/tika/blob/3e145053/tika-parsers/src/test/resources/test-documents/testXMP.xmp
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/resources/test-documents/testXMP.xmp b/tika-parsers/src/test/resources/test-documents/testXMP.xmp
new file mode 100644
index 0000000..00fe0f9
--- /dev/null
+++ b/tika-parsers/src/test/resources/test-documents/testXMP.xmp
@@ -0,0 +1,178 @@
+<?xpacket begin="\ufeff" id="W5M0MpCehiHzreSzNTczkc9d"?>
+<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="Adobe XMP Core 5.4-c005 78.147326, 2012/08/23-13:03:03 ">
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+ <rdf:Description rdf:about=""
+ xmlns:xmp="http://ns.adobe.com/xap/1.0/"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:xmpMM="http://ns.adobe.com/xap/1.0/mm/"
+ xmlns:stEvt="http://ns.adobe.com/xap/1.0/sType/ResourceEvent#"
+ xmlns:pdf="http://ns.adobe.com/pdf/1.3/"
+ xmlns:pdfaid="http://www.aiim.org/pdfa/ns/id/"
+ xmlns:pdfaExtension="http://www.aiim.org/pdfa/ns/extension/"
+ xmlns:pdfaSchema="http://www.aiim.org/pdfa/ns/schema#"
+ xmlns:pdfaProperty="http://www.aiim.org/pdfa/ns/property#">
+ <xmp:CreateDate>2014-03-04T21:56:45+01:00</xmp:CreateDate>
+ <xmp:CreatorTool>Adobe Acrobat 10.0</xmp:CreatorTool>
+ <xmp:ModifyDate>2014-03-04T23:54:48+01:00</xmp:ModifyDate>
+ <xmp:MetadataDate>2014-03-04T23:54:48+01:00</xmp:MetadataDate>
+ <dc:format>application/pdf</dc:format>
+ <dc:title>
+ <rdf:Alt>
+ <rdf:li xml:lang="x-default">Sample Acrobat 4.x (PDF Version 1.3)</rdf:li>
+ </rdf:Alt>
+ </dc:title>
+ <dc:creator>
+ <rdf:Bag/>
+ </dc:creator>
+ <xmpMM:DocumentID>uuid:cccee1fc-51b3-4b52-ac86-672af3974d25</xmpMM:DocumentID>
+ <xmpMM:InstanceID>uuid:afa71b09-7cc5-48ac-8664-ac6dcf8b5ab4</xmpMM:InstanceID>
+ <xmpMM:RenditionClass>default</xmpMM:RenditionClass>
+ <xmpMM:VersionID>1</xmpMM:VersionID>
+ <xmpMM:History>
+ <rdf:Seq>
+ <rdf:li rdf:parseType="Resource">
+ <stEvt:action>converted</stEvt:action>
+ <stEvt:instanceID>uuid:0313504b-a0b0-4dac-a9f0-357221f2eadf</stEvt:instanceID>
+ <stEvt:parameters>converted to PDF/A-1a</stEvt:parameters>
+ <stEvt:softwareAgent>Preflight</stEvt:softwareAgent>
+ <stEvt:when>2014-03-04T23:50:41+01:00</stEvt:when>
+ </rdf:li>
+ <rdf:li rdf:parseType="Resource">
+ <stEvt:action>converted</stEvt:action>
+ <stEvt:instanceID>uuid:edc4279e-0d5f-465e-b13e-1298402fd11c</stEvt:instanceID>
+ <stEvt:parameters>PDF/A conversion failed; Version and conformance level identification removed</stEvt:parameters>
+ <stEvt:softwareAgent>Preflight</stEvt:softwareAgent>
+ <stEvt:when>2014-03-04T23:50:42+01:00</stEvt:when>
+ </rdf:li>
+ <rdf:li rdf:parseType="Resource">
+ <stEvt:action>converted</stEvt:action>
+ <stEvt:instanceID>uuid:f565b775-43f3-4a9a-8541-e98c4115db6d</stEvt:instanceID>
+ <stEvt:parameters>converted to PDF/A-1a</stEvt:parameters>
+ <stEvt:softwareAgent>Preflight</stEvt:softwareAgent>
+ <stEvt:when>2014-03-04T23:51:34+01:00</stEvt:when>
+ </rdf:li>
+ <rdf:li rdf:parseType="Resource">
+ <stEvt:action>converted</stEvt:action>
+ <stEvt:instanceID>uuid:9fd5e0a8-14a5-4920-ad7f-870c0b8ee65f</stEvt:instanceID>
+ <stEvt:parameters>converted to PDF/A-1a</stEvt:parameters>
+ <stEvt:softwareAgent>Preflight</stEvt:softwareAgent>
+ <stEvt:when>2014-03-04T23:51:36+01:00</stEvt:when>
+ </rdf:li>
+ <rdf:li rdf:parseType="Resource">
+ <stEvt:action>converted</stEvt:action>
+ <stEvt:instanceID>uuid:09b6cfba-efde-4e07-a77f-70de858cc0aa</stEvt:instanceID>
+ <stEvt:parameters>PDF/A conversion failed; Version and conformance level identification removed</stEvt:parameters>
+ <stEvt:softwareAgent>Preflight</stEvt:softwareAgent>
+ <stEvt:when>2014-03-04T23:51:37+01:00</stEvt:when>
+ </rdf:li>
+ <rdf:li rdf:parseType="Resource">
+ <stEvt:action>converted</stEvt:action>
+ <stEvt:instanceID>uuid:1e4ffbd7-dabc-4aae-801c-15b3404ade36</stEvt:instanceID>
+ <stEvt:parameters>converted to PDF/A-1b</stEvt:parameters>
+ <stEvt:softwareAgent>Preflight</stEvt:softwareAgent>
+ <stEvt:when>2014-03-04T23:52:22+01:00</stEvt:when>
+ </rdf:li>
+ <rdf:li rdf:parseType="Resource">
+ <stEvt:action>converted</stEvt:action>
+ <stEvt:instanceID>uuid:c1669773-a6ca-4bdd-aade-519030d0af00</stEvt:instanceID>
+ <stEvt:parameters>converted to PDF/A-1b</stEvt:parameters>
+ <stEvt:softwareAgent>Preflight</stEvt:softwareAgent>
+ <stEvt:when>2014-03-04T23:54:48+01:00</stEvt:when>
+ </rdf:li>
+ </rdf:Seq>
+ </xmpMM:History>
+ <pdf:Producer>Acrobat Web Capture 10.0</pdf:Producer>
+ <pdfaid:part>1</pdfaid:part>
+ <pdfaid:conformance>B</pdfaid:conformance>
+ <pdfaExtension:schemas>
+ <rdf:Bag>
+ <rdf:li rdf:parseType="Resource">
+ <pdfaSchema:namespaceURI>http://ns.adobe.com/pdf/1.3/</pdfaSchema:namespaceURI>
+ <pdfaSchema:prefix>pdf</pdfaSchema:prefix>
+ <pdfaSchema:schema>Adobe PDF Schema</pdfaSchema:schema>
+ <pdfaSchema:property>
+ <rdf:Seq>
+ <rdf:li rdf:parseType="Resource">
+ <pdfaProperty:category>internal</pdfaProperty:category>
+ <pdfaProperty:description>A name object indicating whether the document has been modified to include trapping information</pdfaProperty:description>
+ <pdfaProperty:name>Trapped</pdfaProperty:name>
+ <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+ </rdf:li>
+ </rdf:Seq>
+ </pdfaSchema:property>
+ </rdf:li>
+ <rdf:li rdf:parseType="Resource">
+ <pdfaSchema:namespaceURI>http://ns.adobe.com/xap/1.0/mm/</pdfaSchema:namespaceURI>
+ <pdfaSchema:prefix>xmpMM</pdfaSchema:prefix>
+ <pdfaSchema:schema>XMP Media Management Schema</pdfaSchema:schema>
+ <pdfaSchema:property>
+ <rdf:Seq>
+ <rdf:li rdf:parseType="Resource">
+ <pdfaProperty:category>internal</pdfaProperty:category>
+ <pdfaProperty:description>UUID based identifier for specific incarnation of a document</pdfaProperty:description>
+ <pdfaProperty:name>InstanceID</pdfaProperty:name>
+ <pdfaProperty:valueType>URI</pdfaProperty:valueType>
+ </rdf:li>
+ <rdf:li rdf:parseType="Resource">
+ <pdfaProperty:category>internal</pdfaProperty:category>
+ <pdfaProperty:description>The common identifier for all versions and renditions of a document.</pdfaProperty:description>
+ <pdfaProperty:name>OriginalDocumentID</pdfaProperty:name>
+ <pdfaProperty:valueType>URI</pdfaProperty:valueType>
+ </rdf:li>
+ </rdf:Seq>
+ </pdfaSchema:property>
+ </rdf:li>
+ <rdf:li rdf:parseType="Resource">
+ <pdfaSchema:namespaceURI>http://www.aiim.org/pdfa/ns/id/</pdfaSchema:namespaceURI>
+ <pdfaSchema:prefix>pdfaid</pdfaSchema:prefix>
+ <pdfaSchema:schema>PDF/A ID Schema</pdfaSchema:schema>
+ <pdfaSchema:property>
+ <rdf:Seq>
+ <rdf:li rdf:parseType="Resource">
+ <pdfaProperty:category>internal</pdfaProperty:category>
+ <pdfaProperty:description>Part of PDF/A standard</pdfaProperty:description>
+ <pdfaProperty:name>part</pdfaProperty:name>
+ <pdfaProperty:valueType>Integer</pdfaProperty:valueType>
+ </rdf:li>
+ <rdf:li rdf:parseType="Resource">
+ <pdfaProperty:category>internal</pdfaProperty:category>
+ <pdfaProperty:description>Amendment of PDF/A standard</pdfaProperty:description>
+ <pdfaProperty:name>amd</pdfaProperty:name>
+ <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+ </rdf:li>
+ <rdf:li rdf:parseType="Resource">
+ <pdfaProperty:category>internal</pdfaProperty:category>
+ <pdfaProperty:description>Conformance level of PDF/A standard</pdfaProperty:description>
+ <pdfaProperty:name>conformance</pdfaProperty:name>
+ <pdfaProperty:valueType>Text</pdfaProperty:valueType>
+ </rdf:li>
+ </rdf:Seq>
+ </pdfaSchema:property>
+ </rdf:li>
+ </rdf:Bag>
+ </pdfaExtension:schemas>
+ </rdf:Description>
+ </rdf:RDF>
+</x:xmpmeta>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<?xpacket end="w"?>
\ No newline at end of file
[7/7] tika git commit: fix conflict
Posted by ta...@apache.org.
fix conflict
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/ef1f7b9e
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/ef1f7b9e
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/ef1f7b9e
Branch: refs/heads/TIKA-1508
Commit: ef1f7b9ec1b39d957450f3b8a11d045579068e6d
Parents: 2140858
Author: tballison <ta...@mitre.org>
Authored: Mon Jun 13 11:17:27 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Jun 13 11:17:27 2016 -0400
----------------------------------------------------------------------
tika-core/src/main/java/org/apache/tika/config/TikaConfig.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/ef1f7b9e/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 84fd636..e76b6e6 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -564,10 +564,10 @@ public class TikaConfig {
// See the thread "Configuring parsers and translators" for details
}
- //if the instance is configurable, then call configure()
Map<String, Param<?>> params = getParams(element);
//Assigning the params to bean fields/setters
AnnotationUtils.assignFieldParams(loaded, params);
+
// Have any decoration performed, eg explicit mimetypes
loaded = decorate(loaded, element);
// All done with setup
[3/7] tika git commit: TIKA-1996 -- upgrade to PDFBox 2.0.2
Posted by ta...@apache.org.
TIKA-1996 -- upgrade to PDFBox 2.0.2
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/06633cc1
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/06633cc1
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/06633cc1
Branch: refs/heads/TIKA-1508
Commit: 06633cc18df73c1cf4d19092a641a5355e19ac4c
Parents: 99aa587
Author: tballison <ta...@mitre.org>
Authored: Mon Jun 13 09:21:27 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Jun 13 09:21:27 2016 -0400
----------------------------------------------------------------------
CHANGES.txt | 2 ++
tika-parsers/pom.xml | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/06633cc1/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 0387bd6..6008b51 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
Release 1.14 - ???
+ * Upgrade to PDFBox 2.0.2 (TIKA-1996).
+
* Add configurable maximum threshold for number of events extracted
from the XMP Media Management Schema in JempboxExtractor (TIKA-1999).
http://git-wip-us.apache.org/repos/asf/tika/blob/06633cc1/tika-parsers/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index fec6449..a126eed 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -42,7 +42,7 @@
<tukaani.version>1.5</tukaani.version>
<mime4j.version>0.7.2</mime4j.version>
<vorbis.version>0.8</vorbis.version>
- <pdfbox.version>2.0.1</pdfbox.version>
+ <pdfbox.version>2.0.2</pdfbox.version>
<jempbox.version>1.8.12</jempbox.version>
<netcdf-java.version>4.5.5</netcdf-java.version>
<cxf.version>3.0.3</cxf.version>
[2/7] tika git commit: TIKA-1999 small fix and update CHANGES.txt
Posted by ta...@apache.org.
TIKA-1999 small fix and update CHANGES.txt
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/99aa587d
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/99aa587d
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/99aa587d
Branch: refs/heads/TIKA-1508
Commit: 99aa587d171207c0c557ce65397f767d6a42cdfd
Parents: 3e14505
Author: tballison <ta...@mitre.org>
Authored: Wed Jun 8 13:46:29 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Wed Jun 8 13:46:29 2016 -0400
----------------------------------------------------------------------
CHANGES.txt | 3 +++
.../java/org/apache/tika/parser/image/xmp/JempboxExtractor.java | 5 ++---
2 files changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/99aa587d/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 08cd8ff..0387bd6 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,8 @@
Release 1.14 - ???
+ * Add configurable maximum threshold for number of events extracted
+ from the XMP Media Management Schema in JempboxExtractor (TIKA-1999).
+
* Integrate TesseractOCR with full page image rendering for PDFs (TIKA-1994).
* Add mime detection via Nick C and parser for DBF files (TIKA-1513).
http://git-wip-us.apache.org/repos/asf/tika/blob/99aa587d/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
index d9ae71d..6d5038a 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
@@ -43,14 +43,13 @@ import org.xml.sax.SAXException;
public class JempboxExtractor {
- private static int MAX_EVENT_HISTORY_IN_XMPMM = 1024;
+ private static volatile int MAX_EVENT_HISTORY_IN_XMPMM = 1024;
// The XMP spec says it must be unicode, but for most file formats it specifies "must be encoded in UTF-8"
private static final String DEFAULT_XMP_CHARSET = UTF_8.name();
private XMPPacketScanner scanner = new XMPPacketScanner();
private Metadata metadata;
- private static int maxXMPMMHistory;
public JempboxExtractor(Metadata metadata) {
this.metadata = metadata;
@@ -228,6 +227,6 @@ public class JempboxExtractor {
* @return maximum number of events to extract from the XMPMM history.
*/
public static int getMaxXMPMMHistory() {
- return maxXMPMMHistory;
+ return MAX_EVENT_HISTORY_IN_XMPMM;
}
}
[5/7] tika git commit: Start factoring out "configurable";
change signature of ParseContext's setParam to (Class, Param);
add check for illegal field being specified in TikaConfig.
Posted by ta...@apache.org.
Start factoring out "configurable"; change signature of ParseContext's setParam to (Class, Param); add check for illegal field being specified in TikaConfig.
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/338db905
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/338db905
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/338db905
Branch: refs/heads/TIKA-1508
Commit: 338db905d4e203d4df4582d5511242eaa922af6b
Parents: ecdc403
Author: tballison <ta...@mitre.org>
Authored: Mon Jun 13 11:14:27 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Jun 13 11:14:27 2016 -0400
----------------------------------------------------------------------
.../java/org/apache/tika/config/TikaConfig.java | 12 ++---
.../org/apache/tika/parser/AbstractParser.java | 24 +---------
.../org/apache/tika/parser/ParseContext.java | 46 +++++++++++++-------
.../org/apache/tika/utils/AnnotationUtils.java | 24 +++++++---
.../tika/parser/ConfigurableParserTest.java | 3 ++
.../tika/parser/DummyConfigurableParser.java | 6 +--
.../tika/parser/DummyParameterizedParser.java | 3 +-
.../tika/parser/ParameterizedParserTest.java | 1 -
.../org/apache/tika/parser/pdf/PDFParser.java | 16 ++++---
.../apache/tika/parser/pdf/PDFParserTest.java | 19 +++++++-
10 files changed, 86 insertions(+), 68 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/338db905/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 853cdf0..692b007 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -567,15 +567,9 @@ public class TikaConfig {
// Have any decoration performed, eg explicit mimetypes
loaded = decorate(loaded, element);
//if the instance is configurable, then call configure()
- if (loaded instanceof Configurable){
- Map<String, Param<?>> params = getParams(element);
- //Assigning the params to bean fields/setters
- AnnotationUtils.assignFieldParams(loaded, params);
- //invoking the configure() hook
- ParseContext context = new ParseContext();
- context.getParams().putAll(params);
- ((Configurable) loaded).configure(context); // initialize here
- }
+ Map<String, Param<?>> params = getParams(element);
+ //Assigning the params to bean fields/setters
+ AnnotationUtils.assignFieldParams(loaded, params);
// All done with setup
return loaded;
} catch (ClassNotFoundException e) {
http://git-wip-us.apache.org/repos/asf/tika/blob/338db905/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
index 5c045db..51687e7 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AbstractParser.java
@@ -34,7 +34,7 @@ import org.xml.sax.SAXException;
*
* @since Apache Tika 0.10
*/
-public abstract class AbstractParser implements ConfigurableParser {
+public abstract class AbstractParser implements Parser {
/**
* Configuration supplied at runtime
@@ -62,27 +62,5 @@ public abstract class AbstractParser implements ConfigurableParser {
parse(stream, handler, metadata, new ParseContext());
}
- /**
- * called by the framework to supply runtime parameters which may be
- * required for initialization
- * @param context the parser context at runtime
- * @since Apache Tika 1.14
- */
- @Override
- public void configure(ParseContext context) throws TikaConfigException {
- this.context = context;
- }
-
-
- /**
- * Gets Parameters of this configurable instance
- * @return a map of key value pairs
- *
- * @since Apache Tika 1.14
- */
- @Override
- public Map<String, Param<?>> getParams() {
- return this.context.getParams();
- }
}
http://git-wip-us.apache.org/repos/asf/tika/blob/338db905/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index dc03099..68d5038 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@ -29,6 +29,7 @@ import java.io.IOException;
import java.io.Serializable;
import java.io.StringReader;
import java.lang.reflect.Method;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
@@ -55,10 +56,12 @@ public class ParseContext implements Serializable {
/** Map of objects in this context */
private final Map<String, Object> context = new HashMap<String, Object>();
+ private final static Map<String, Param<?>> EMPTY_PARAMS = Collections.EMPTY_MAP;
+
/**
* Map of configurable arguments
*/
- private final Map<String, Param<?>> params = new HashMap<>();
+ private final Map<String, Map<String, Param<?>>> params = new HashMap<>();
private static final EntityResolver IGNORING_SAX_ENTITY_RESOLVER = new EntityResolver() {
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
@@ -202,29 +205,42 @@ public class ParseContext implements Serializable {
}
/**
- * Stores a key=value parameter
- * @param key parameter name
+ * @param clazz class associated with given param name
* @param value value
*/
- public void setParam(String key, Param<?> value){
- this.params.put(key, value);
+ public void setParam(Class clazz, Param<?> value){
+ Map<String, Param<?>> classParams = this.params.get(clazz.getName());
+ if (classParams == null) {
+ classParams = new HashMap<>();
+ }
+ classParams.put(value.getName(), value);
+ this.params.put(clazz.getName(), classParams);
}
/**
- * Gets the value associated with given parameter
+ * Gets the value associated with given class and parameter
+ * @param clazz class
* @param key parameter name
- * @return param value
+ * @return param value or null if the clazz or key doesn't exist
*/
- public Param<?> getParam(String key){
- return this.params.get(key);
+ public Param<?> getParam(Class clazz, String key) {
+ Map<String, Param<?>> classParams = this.params.get(clazz.getName());
+ if (classParams != null) {
+ return classParams.get(key);
+ }
+ return null;
}
/**
- * Gets all the params
- * @return map of key values
+ * Gets all the params for the specified class
+ * @param clazz class for which to grab the params
+ * @return map of key values or null if nothing has been specified
*/
- public Map<String, Param<?>> getParams() {
- return params;
+ public Map<String, Param<?>> getParams(Class clazz) {
+ if (params.containsKey(clazz.getName())) {
+ return params.get(clazz.getName());
+ }
+ return EMPTY_PARAMS;
}
/**
@@ -232,8 +248,8 @@ public class ParseContext implements Serializable {
* @param key parameter name
* @return true if parameter is available, false otherwise
*/
- public boolean hasParam(String key){
- return params.containsKey(key);
+ public boolean hasParam(Class clazz, String key){
+ return params.containsKey(clazz) && params.get(clazz.getName()).containsKey(key);
}
/**
* Returns the DOM builder factory specified in this parsing context.
http://git-wip-us.apache.org/repos/asf/tika/blob/338db905/tika-core/src/main/java/org/apache/tika/utils/AnnotationUtils.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/utils/AnnotationUtils.java b/tika-core/src/main/java/org/apache/tika/utils/AnnotationUtils.java
index 08e004b..1f56bc7 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/AnnotationUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/AnnotationUtils.java
@@ -26,11 +26,7 @@ import java.lang.annotation.Annotation;
import java.lang.reflect.AccessibleObject;
import java.security.AccessController;
import java.security.PrivilegedAction;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
/**
* This class contains utilities for dealing with tika annotations
@@ -100,7 +96,11 @@ public class AnnotationUtils {
}
List<ParamField> fields = PARAM_INFO.get(beanClass);
+
+ Set<String> validFieldNames = new HashSet<>();
+
for (ParamField field : fields) {
+ validFieldNames.add(field.getName());
Param<?> param = params.get(field.getName());
if (param != null){
if (field.getType().isAssignableFrom(param.getType())) {
@@ -110,7 +110,7 @@ public class AnnotationUtils {
throw new TikaConfigException(e.getMessage(), e);
}
} else {
- String msg = String.format("Value '%s' of type '%s' cant be" +
+ String msg = String.format(Locale.ROOT, "Value '%s' of type '%s' cant be" +
" assigned to field '%s' of defined type '%s'",
param.getValue(), param.getValue().getClass(),
field.getName(), field.getType());
@@ -118,7 +118,7 @@ public class AnnotationUtils {
}
} else if (field.isRequired()){
//param not supplied but field is declared as required?
- String msg = String.format("Param %s is required for %s," +
+ String msg = String.format(Locale.ROOT, "Param %s is required for %s," +
" but it is not given in config.", field.getName(),
bean.getClass().getName());
throw new TikaConfigException(msg);
@@ -127,5 +127,15 @@ public class AnnotationUtils {
//LOG.debug("Param not supplied, field is not mandatory");
}
}
+ //now test that params doesn't contain a field
+ //not allowed by this object
+ for (String fieldName : params.keySet()) {
+ if (! validFieldNames.contains(fieldName)) {
+ String msg = String.format(Locale.ROOT,
+ "No field '%s' exists for %s",
+ fieldName, bean.getClass().getName());
+ throw new TikaConfigException(msg);
+ }
+ }
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tika/blob/338db905/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java b/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java
index dcf188d..ffb632c 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/ConfigurableParserTest.java
@@ -20,6 +20,7 @@ import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.junit.Assert;
+import org.junit.Ignore;
import org.junit.Test;
import java.io.File;
@@ -36,6 +37,7 @@ public class ConfigurableParserTest {
public static final String TEST_PARAM_VAL = "testparamval";
@Test
+ @Ignore
public void testConfigurableParser() throws Exception {
URL configFileUrl = getClass().getClassLoader().getResource(TIKA_CFG_FILE);
assert configFileUrl != null;
@@ -48,6 +50,7 @@ public class ConfigurableParserTest {
}
@Test
+ @Ignore
public void testConfigurableParserTypes() throws Exception {
URL configFileUrl = getClass().getClassLoader().getResource(TIKA_CFG_FILE);
assert configFileUrl != null;
http://git-wip-us.apache.org/repos/asf/tika/blob/338db905/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
index 3914b01..15fe060 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyConfigurableParser.java
@@ -39,8 +39,8 @@ import java.util.Set;
* 3. parameters were available at parse
*
*/
-public class DummyConfigurableParser extends AbstractParser {
-
+public class DummyConfigurableParser {
+/*
private static Set<MediaType> MIMES = new HashSet<>();
static {
MIMES.add(MediaType.TEXT_PLAIN);
@@ -63,5 +63,5 @@ public class DummyConfigurableParser extends AbstractParser {
metadata.add(entry.getKey()+"-type", param.getValue().getClass().getName());
}
}
-
+*/
}
http://git-wip-us.apache.org/repos/asf/tika/blob/338db905/tika-core/src/test/java/org/apache/tika/parser/DummyParameterizedParser.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/DummyParameterizedParser.java b/tika-core/src/test/java/org/apache/tika/parser/DummyParameterizedParser.java
index 848b774..801d65e 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/DummyParameterizedParser.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/DummyParameterizedParser.java
@@ -40,8 +40,7 @@ import static org.osgi.util.measurement.Unit.s;
* A test Parsers to test {@link Field}
* @since Apache Tika 1.14
*/
-public class DummyParameterizedParser extends AbstractParser
- implements ConfigurableParser {
+public class DummyParameterizedParser extends AbstractParser {
private static Set<MediaType> MIMES = new HashSet<>();
static {
http://git-wip-us.apache.org/repos/asf/tika/blob/338db905/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java
----------------------------------------------------------------------
diff --git a/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java b/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java
index e0c3b53..a048f29 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java
@@ -77,7 +77,6 @@ public class ParameterizedParserTest {
}
@Test
- @Ignore("can we get this to work, somehow?")
public void testBadParam() throws Exception {
try {
Metadata m = getMetadata("TIKA-1986-bad-parameters.xml");
http://git-wip-us.apache.org/repos/asf/tika/blob/338db905/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
index bacc901..dd03177 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
@@ -21,12 +21,7 @@ import javax.xml.stream.XMLStreamException;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.util.Arrays;
-import java.util.Calendar;
-import java.util.Collections;
-import java.util.List;
-import java.util.Locale;
-import java.util.Set;
+import java.util.*;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.jempbox.xmp.XMPMetadata;
@@ -44,6 +39,7 @@ import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.tika.config.Field;
+import org.apache.tika.config.Param;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
@@ -86,7 +82,7 @@ import static org.bouncycastle.asn1.x500.style.RFC4519Style.name;
* turn this feature on, see
* {@link PDFParserConfig#setExtractInlineImages(boolean)}.
*/
-public class PDFParser extends AbstractParser implements ConfigurableParser {
+public class PDFParser extends AbstractParser {
/**
@@ -123,6 +119,12 @@ public class PDFParser extends AbstractParser implements ConfigurableParser {
PDFParserConfig localConfig = context.get(PDFParserConfig.class, defaultConfig);
//TODO: get rid of this after dev of TIKA-1508!!!
localConfig.setSortByPosition(sortByPosition);
+
+ //TODO: this is just a mockup...move elsewhere
+ Map<String, Param<?>> params = context.getParams(PDFParser.class);
+ if (params != null && params.containsKey("sortByPosition")) {
+ localConfig.setSortByPosition((Boolean)params.get("sortByPosition").getValue());
+ }
String password = "";
try {
// PDFBox can process entirely in memory, or can use a temp file
http://git-wip-us.apache.org/repos/asf/tika/blob/338db905/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index ac54b11..2ef29f3 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -35,6 +35,7 @@ import org.apache.commons.io.IOUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.tika.TikaTest;
+import org.apache.tika.config.Param;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.AccessPermissionException;
import org.apache.tika.exception.EncryptedDocumentException;
@@ -469,7 +470,7 @@ public class PDFParserTest extends TikaTest {
content = content.replaceAll("\\s+", " ");
assertContains("Left column line 1 Left column line 2 Right column line 1 Right column line 2", content);
- parser.getPDFParserConfig().setSortByPosition(true);
+ parser.setSortByPosition(true);
stream = getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf");
content = getText(stream, parser);
content = content.replaceAll("\\s+", " ");
@@ -1229,6 +1230,22 @@ public class PDFParserTest extends TikaTest {
}
+ @Test
+ public void testParameterizationViaContext() throws Exception {
+ ParseContext context = new ParseContext();
+
+ Param<Boolean> paramVal = new Param<>("sortByPosition", new Boolean(true));
+ context.setParam(PDFParser.class, paramVal);
+
+ Parser p = new AutoDetectParser();
+ String text = getText(getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf"), p, context);
+ text = text.replaceAll("\\s+", " ");
+
+ // Column text is now interleaved:
+ assertContains("Left column line 1 Right column line 1 Left colu mn line 2 Right column line 2", text);
+
+ }
+
private void assertException(String path, Parser parser, ParseContext context, Class expected) {
boolean noEx = false;
InputStream is = getResourceAsStream(path);
[6/7] tika git commit: Merge remote-tracking branch
'origin/TIKA-1508' into TIKA-1508
Posted by ta...@apache.org.
Merge remote-tracking branch 'origin/TIKA-1508' into TIKA-1508
# Conflicts:
# tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/21408588
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/21408588
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/21408588
Branch: refs/heads/TIKA-1508
Commit: 2140858840af8f1c015f3570dc4ac8d2bb4405cf
Parents: 338db90 e48d191
Author: tballison <ta...@mitre.org>
Authored: Mon Jun 13 11:16:34 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Jun 13 11:16:34 2016 -0400
----------------------------------------------------------------------
.../java/org/apache/tika/config/TikaConfig.java | 6 +--
.../tika/parser/ParameterizedParserTest.java | 9 +++++
.../TIKA-1986-parameterized-decorated.xml | 39 ++++++++++++++++++++
3 files changed, 51 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/21408588/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
----------------------------------------------------------------------
diff --cc tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 692b007,1163d84..84fd636
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@@ -563,13 -563,19 +563,13 @@@ public class TikaConfig
// TODO Support arguments, needed for Translators etc
// See the thread "Configuring parsers and translators" for details
}
-
- // Have any decoration performed, eg explicit mimetypes
- loaded = decorate(loaded, element);
+
//if the instance is configurable, then call configure()
- if (loaded instanceof Configurable){
- Map<String, Param<?>> params = getParams(element);
- //Assigning the params to bean fields/setters
- AnnotationUtils.assignFieldParams(loaded, params);
- //invoking the configure() hook
- ParseContext context = new ParseContext();
- context.getParams().putAll(params);
- ((Configurable) loaded).configure(context); // initialize here
- }
+ Map<String, Param<?>> params = getParams(element);
+ //Assigning the params to bean fields/setters
+ AnnotationUtils.assignFieldParams(loaded, params);
+ // Have any decoration performed, eg explicit mimetypes
+ loaded = decorate(loaded, element);
// All done with setup
return loaded;
} catch (ClassNotFoundException e) {
http://git-wip-us.apache.org/repos/asf/tika/blob/21408588/tika-core/src/test/java/org/apache/tika/parser/ParameterizedParserTest.java
----------------------------------------------------------------------
[4/7] tika git commit: Merge remote-tracking branch 'origin/master'
into TIKA-1508
Posted by ta...@apache.org.
Merge remote-tracking branch 'origin/master' into TIKA-1508
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/ecdc4035
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/ecdc4035
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/ecdc4035
Branch: refs/heads/TIKA-1508
Commit: ecdc403578d2a2b8fb70f66b7df1ece96b5efa9c
Parents: 853750d 06633cc
Author: tballison <ta...@mitre.org>
Authored: Mon Jun 13 09:25:24 2016 -0400
Committer: tballison <ta...@mitre.org>
Committed: Mon Jun 13 09:25:24 2016 -0400
----------------------------------------------------------------------
CHANGES.txt | 5 +
tika-parsers/pom.xml | 2 +-
.../tika/parser/image/xmp/JempboxExtractor.java | 30 ++++
.../parser/image/xmp/JempboxExtractorTest.java | 29 ++-
.../test/resources/test-documents/testXMP.xmp | 178 +++++++++++++++++++
5 files changed, 242 insertions(+), 2 deletions(-)
----------------------------------------------------------------------