You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/01/26 18:41:51 UTC
[tika] branch master updated: TIKA-1974 -- remove deprecated
metadata properties/keys for Tika 2.0
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 10a8eec TIKA-1974 -- remove deprecated metadata properties/keys for Tika 2.0
new e6e3b88 Merge branch 'TIKA-1974'
10a8eec is described below
commit 10a8eec119c7a77be76000b30aaffb96a552cc44
Author: tballison <ta...@mitre.org>
AuthorDate: Fri Jan 26 13:26:27 2018 -0500
TIKA-1974 -- remove deprecated metadata properties/keys for Tika 2.0
---
CHANGES.txt | 2 +
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 5 +-
.../src/main/java/org/apache/tika/gui/TikaGUI.java | 5 +-
.../test/java/org/apache/tika/cli/TikaCLITest.java | 18 +--
.../apache/tika/batch/fs/FSDocumentSelector.java | 5 +-
.../org/apache/tika/batch/fs/FSFileResource.java | 5 +-
.../RecursiveParserWrapperFSConsumerTest.java | 4 +-
.../test/java/org/apache/tika/bundle/BundleIT.java | 5 +-
tika-core/pom.xml | 1 +
tika-core/src/main/java/org/apache/tika/Tika.java | 3 +-
.../java/org/apache/tika/detect/NameDetector.java | 3 +-
.../tika/extractor/ParserContainerExtractor.java | 3 +-
.../ParsingEmbeddedDocumentExtractor.java | 5 +-
.../java/org/apache/tika/io/FilenameUtils.java | 2 +-
.../java/org/apache/tika/io/TikaInputStream.java | 7 +-
.../apache/tika/metadata/AccessPermissions.java | 2 +-
.../java/org/apache/tika/metadata/Database.java | 2 +-
.../java/org/apache/tika/metadata/DublinCore.java | 34 ++---
.../main/java/org/apache/tika/metadata/HTML.java | 2 +-
.../main/java/org/apache/tika/metadata/IPTC.java | 168 ++++++++++-----------
.../java/org/apache/tika/metadata/MSOffice.java | 128 ----------------
.../java/org/apache/tika/metadata/Message.java | 4 +-
.../java/org/apache/tika/metadata/Metadata.java | 72 +++------
.../main/java/org/apache/tika/metadata/Office.java | 55 +++----
.../apache/tika/metadata/OfficeOpenXMLCore.java | 21 +--
.../tika/metadata/OfficeOpenXMLExtended.java | 22 +--
.../main/java/org/apache/tika/metadata/PDF.java | 10 +-
.../java/org/apache/tika/metadata/Photoshop.java | 30 ++--
.../java/org/apache/tika/metadata/QuattroPro.java | 8 +-
.../java/org/apache/tika/metadata/RTFMetadata.java | 10 +-
.../apache/tika/metadata/TikaCoreProperties.java | 156 +++++++------------
.../org/apache/tika/metadata/TikaMetadataKeys.java | 34 -----
.../java/org/apache/tika/metadata/WordPerfect.java | 14 +-
.../main/java/org/apache/tika/metadata/XMP.java | 2 +-
.../main/java/org/apache/tika/mime/MimeTypes.java | 3 +-
.../mime/ProbabilisticMimeDetectionSelector.java | 3 +-
.../java/org/apache/tika/parser/ParsingReader.java | 5 +-
.../apache/tika/parser/RecursiveParserWrapper.java | 9 +-
.../tika/parser/digest/InputStreamDigester.java | 2 +-
.../src/test/java/org/apache/tika/TikaTest.java | 3 +-
.../tika/detect/MimeDetectionWithNNTest.java | 2 +-
.../org/apache/tika/detect/NameDetectorTest.java | 3 +-
.../org/apache/tika/io/TikaInputStreamTest.java | 3 +-
.../org/apache/tika/metadata/TestMetadata.java | 3 +-
.../org/apache/tika/mime/MimeDetectionTest.java | 11 +-
.../org/apache/tika/mime/MimeTypesReaderTest.java | 7 +-
.../tika/mime/ProbabilisticMimeDetectionTest.java | 11 +-
.../ProbabilisticMimeDetectionTestWithTika.java | 11 +-
.../org/apache/tika/parser/mock/MockParser.java | 4 +-
.../java/org/apache/tika/eval/ExtractComparer.java | 2 +-
.../org/apache/tika/eval/io/ExtractReader.java | 3 +-
.../apache/tika/example/ExtractEmbeddedFiles.java | 3 +-
.../org/apache/tika/example/ImportContextImpl.java | 3 +-
.../tika/example/MetadataAwareLuceneIndexer.java | 17 ++-
.../java/org/apache/tika/example/MyFirstTika.java | 3 +-
.../org/apache/tika/example/ParsingExample.java | 3 +-
.../java/org/apache/tika/example/RecentFiles.java | 13 +-
.../apache/tika/example/TrecDocumentGenerator.java | 5 +-
.../apache/tika/parser/asm/XHTMLClassVisitor.java | 2 +-
.../apache/tika/parser/code/SourceCodeParser.java | 2 +-
.../java/org/apache/tika/parser/dwg/DWGParser.java | 8 +-
.../tika/parser/executable/ExecutableParser.java | 3 +-
.../tika/parser/font/AdobeFontMetricParser.java | 2 +-
.../java/org/apache/tika/parser/hdf/HDFParser.java | 3 +-
.../org/apache/tika/parser/html/HtmlHandler.java | 4 +-
.../tika/parser/image/ImageMetadataExtractor.java | 3 +-
.../tika/parser/image/xmp/JempboxExtractor.java | 5 +-
.../apache/tika/parser/iptc/IptcAnpaParser.java | 2 +-
.../tika/parser/iwork/KeynoteContentHandler.java | 3 +-
.../tika/parser/iwork/NumbersContentHandler.java | 3 +-
.../tika/parser/iwork/PagesContentHandler.java | 6 +-
.../apache/tika/parser/jdbc/JDBCTableReader.java | 6 +-
.../tika/parser/mail/MailContentHandler.java | 6 +-
.../org/apache/tika/parser/mbox/MboxParser.java | 2 +-
.../apache/tika/parser/mbox/OutlookPSTParser.java | 8 +-
.../parser/microsoft/AbstractPOIFSExtractor.java | 16 +-
.../tika/parser/microsoft/JackcessParser.java | 7 +-
.../apache/tika/parser/microsoft/OfficeParser.java | 2 +-
.../tika/parser/microsoft/OutlookExtractor.java | 2 +-
.../tika/parser/microsoft/SummaryExtractor.java | 34 ++---
.../apache/tika/parser/microsoft/TNEFParser.java | 6 +-
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 12 +-
.../parser/microsoft/ooxml/MetadataExtractor.java | 114 ++++++--------
.../ooxml/XSSFBExcelExtractorDecorator.java | 3 +-
.../ooxml/XSSFExcelExtractorDecorator.java | 5 +-
.../tika/parser/microsoft/xml/WordMLParser.java | 2 +-
.../java/org/apache/tika/parser/mp4/MP4Parser.java | 2 +-
.../tika/parser/odf/OpenDocumentMetaParser.java | 60 ++++----
.../apache/tika/parser/odf/OpenDocumentParser.java | 3 +-
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 2 +-
.../java/org/apache/tika/parser/pdf/PDF2XHTML.java | 2 +-
.../java/org/apache/tika/parser/pdf/PDFParser.java | 15 +-
.../apache/tika/parser/pkg/CompressorParser.java | 5 +-
.../org/apache/tika/parser/pkg/PackageParser.java | 4 +-
.../java/org/apache/tika/parser/prt/PRTParser.java | 2 +-
.../apache/tika/parser/rtf/RTFEmbObjHandler.java | 10 +-
.../apache/tika/parser/rtf/RTFObjDataParser.java | 6 +-
.../org/apache/tika/parser/rtf/TextExtractor.java | 5 +-
.../apache/tika/parser/utils/CommonsDigester.java | 2 +-
.../org/apache/tika/parser/xml/DcXMLParser.java | 2 +-
.../apache/tika/parser/xml/FictionBookParser.java | 4 +-
.../tika/detect/TestContainerAwareDetector.java | 7 +-
.../java/org/apache/tika/mime/TestMimeTypes.java | 7 +-
.../apache/tika/parser/AutoDetectParserTest.java | 8 +-
.../parser/BouncyCastleDigestingParserTest.java | 2 +-
.../apache/tika/parser/DigestingParserTest.java | 2 +-
.../tika/parser/RecursiveParserWrapperTest.java | 18 +--
.../apache/tika/parser/asm/ClassParserTest.java | 2 +-
.../tika/parser/code/SourceCodeParserTest.java | 2 +-
.../org/apache/tika/parser/dwg/DWGParserTest.java | 22 +--
.../parser/executable/ExecutableParserTest.java | 3 +-
.../apache/tika/parser/font/FontParsersTest.java | 3 +-
.../apache/tika/parser/image/BPGParserTest.java | 3 +-
.../parser/image/ImageMetadataExtractorTest.java | 4 +-
.../tika/parser/image/MetadataFieldsTest.java | 2 +-
.../apache/tika/parser/image/TiffParserTest.java | 5 +-
.../parser/image/xmp/JempboxExtractorTest.java | 19 +--
.../apache/tika/parser/iwork/IWorkParserTest.java | 22 +--
.../apache/tika/parser/jdbc/SQLite3ParserTest.java | 13 +-
.../apache/tika/parser/jpeg/JpegParserTest.java | 19 +--
.../apache/tika/parser/mail/RFC822ParserTest.java | 13 +-
.../apache/tika/parser/mbox/MboxParserTest.java | 6 +-
.../tika/parser/microsoft/ExcelParserTest.java | 8 +-
.../tika/parser/microsoft/OldExcelParserTest.java | 2 +-
.../tika/parser/microsoft/OutlookParserTest.java | 3 -
.../microsoft/POIContainerExtractionTest.java | 8 +-
.../parser/microsoft/PowerPointParserTest.java | 21 +--
.../tika/parser/microsoft/ProjectParserTest.java | 5 +-
.../tika/parser/microsoft/PublisherParserTest.java | 1 -
.../tika/parser/microsoft/TNEFParserTest.java | 2 +-
.../tika/parser/microsoft/WordParserTest.java | 16 +-
.../parser/microsoft/ooxml/OOXMLParserTest.java | 60 +++-----
.../parser/microsoft/ooxml/SXSLFExtractorTest.java | 15 +-
.../parser/microsoft/ooxml/SXWPFExtractorTest.java | 19 +--
.../parser/microsoft/xml/XML2003ParserTest.java | 2 +-
.../org/apache/tika/parser/mp3/Mp3ParserTest.java | 8 -
.../org/apache/tika/parser/mp4/MP4ParserTest.java | 3 -
.../tika/parser/ocr/TesseractOCRParserTest.java | 3 +-
.../org/apache/tika/parser/odf/ODFParserTest.java | 81 +++-------
.../org/apache/tika/parser/pdf/PDFParserTest.java | 51 +++----
.../apache/tika/parser/pkg/AbstractPkgTest.java | 2 +-
.../org/apache/tika/parser/pkg/ZipParserTest.java | 2 +-
.../org/apache/tika/parser/prt/PRTParserTest.java | 7 +-
.../org/apache/tika/parser/rtf/RTFParserTest.java | 19 +--
.../parser/solidworks/SolidworksParserTest.java | 24 +--
.../apache/tika/parser/xml/DcXMLParserTest.java | 21 +--
.../EmptyAndDuplicateElementsXMLParserTest.java | 5 +-
.../src/test/resources/test-documents/testPST.pst | Bin 271360 -> 271360 bytes
.../test-documents/testPST_variousBodyTypes.pst | Bin 271360 -> 271360 bytes
.../tika/server/resource/DetectorResource.java | 3 +-
.../apache/tika/server/resource/TikaResource.java | 4 +-
.../tika/server/resource/UnpackerResource.java | 4 +-
.../apache/tika/server/MetadataResourceTest.java | 25 +--
.../tika/server/RecursiveMetadataResourceTest.java | 8 +-
.../main/java/org/apache/tika/xmp/XMPMetadata.java | 3 +-
.../apache/tika/xmp/convert/GenericConverter.java | 3 +-
.../tika/xmp/convert/MSOfficeBinaryConverter.java | 2 +-
.../tika/xmp/convert/MSOfficeXMLConverter.java | 2 +-
.../tika/xmp/convert/OpenDocumentConverter.java | 7 +-
.../org/apache/tika/xmp/convert/RTFConverter.java | 2 +-
.../java/org/apache/tika/xmp/TikaToXMPTest.java | 4 +-
.../java/org/apache/tika/xmp/XMPMetadataTest.java | 16 +-
162 files changed, 823 insertions(+), 1204 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 3dea163..709fcc2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
Release 2.0.0 - ???
+ * Remove deprecated Metadata keys/properties (TIKA-1974).
+
* Require Java 8 (TIKA-2553).
* Add a parser for XPS (TIKA-2524).
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 7d87ec9..472b0aa 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -80,6 +80,7 @@ import org.apache.tika.gui.TikaGUI;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.language.detect.LanguageHandler;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadata;
import org.apache.tika.metadata.serialization.JsonMetadataList;
import org.apache.tika.mime.MediaType;
@@ -1036,7 +1037,7 @@ public class TikaCLI {
}
public void parseEmbedded(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, boolean outputHtml) throws SAXException, IOException {
- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name == null) {
name = "file" + count++;
@@ -1053,7 +1054,7 @@ public class TikaCLI {
}
}
- String relID = metadata.get(Metadata.EMBEDDED_RELATIONSHIP_ID);
+ String relID = metadata.get(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID);
if (relID != null && !name.startsWith(relID)) {
name = relID + "_" + name;
}
diff --git a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
index 5ecc763..bfec921 100644
--- a/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
+++ b/tika-app/src/main/java/org/apache/tika/gui/TikaGUI.java
@@ -67,6 +67,7 @@ import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadataList;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
@@ -367,7 +368,7 @@ public class TikaGUI extends JFrame
}
}
- String name = md.get(Metadata.RESOURCE_NAME_KEY);
+ String name = md.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name != null && name.length() > 0) {
setTitle("Apache Tika: " + name);
} else {
@@ -653,7 +654,7 @@ public class TikaGUI extends JFrame
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if(name != null && wanted.containsKey(name)) {
FileOutputStream out = new FileOutputStream(wanted.get(name));
IOUtils.copy(stream, out);
diff --git a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
index 85f14b3..5c6d359 100644
--- a/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
+++ b/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
@@ -159,9 +159,9 @@ public class TikaCLITest {
//test legacy alphabetic sort of keys
int enc = json.indexOf("\"Content-Encoding\"");
int fb = json.indexOf("fb:admins");
- int title = json.indexOf("\"title\"");
+ int title = json.indexOf("\"dc:title\"");
assertTrue(enc > -1 && fb > -1 && enc < fb);
- assertTrue (fb > -1 && title > -1 && fb < title);
+ assertTrue (fb > -1 && title > -1 && fb > title);
assertTrue(json.contains("\"X-TIKA:digest:MD2\":"));
}
@@ -183,9 +183,9 @@ public class TikaCLITest {
//test legacy alphabetic sort of keys
int enc = json.indexOf("\"Content-Encoding\"");
int fb = json.indexOf("fb:admins");
- int title = json.indexOf("\"title\"");
+ int title = json.indexOf("\"dc:title\"");
assertTrue(enc > -1 && fb > -1 && enc < fb);
- assertTrue (fb > -1 && title > -1 && fb < title);
+ assertTrue (fb > -1 && title > -1 && fb > title);
}
/**
@@ -398,12 +398,10 @@ public class TikaCLITest {
String[] params = new String[]{"-m", "-J", "-r", resourcePrefix+"test_recursive_embedded.docx"};
TikaCLI.main(params);
String content = outContent.toString(UTF_8.name());
- assertTrue(content.contains("[\n" +
- " {\n" +
- " \"Application-Name\": \"Microsoft Office Word\",\n" +
- " \"Application-Version\": \"15.0000\",\n" +
- " \"Character Count\": \"28\",\n" +
- " \"Character-Count-With-Spaces\": \"31\","));
+ assertTrue(content.contains(
+ "\"extended-properties:AppVersion\": \"15.0000\","));
+ assertTrue(content.contains(
+ "\"extended-properties:Application\": \"Microsoft Office Word\","));
assertTrue(content.contains("\"X-TIKA:embedded_resource_path\": \"/embed1.zip\""));
assertFalse(content.contains("X-TIKA:content"));
}
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDocumentSelector.java b/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDocumentSelector.java
index 5db1a2d..baadbc8 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDocumentSelector.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/fs/FSDocumentSelector.java
@@ -22,11 +22,12 @@ import java.util.regex.Pattern;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.util.PropsUtil;
/**
* Selector that chooses files based on their file name
- * and their size, as determined by Metadata.RESOURCE_NAME_KEY and Metadata.CONTENT_LENGTH.
+ * and their size, as determined by TikaCoreProperties.RESOURCE_NAME_KEY and Metadata.CONTENT_LENGTH.
* <p/>
* The {@link #excludeFileName} pattern is applied first (if it isn't null).
* Then the {@link #includeFileName} pattern is applied (if it isn't null),
@@ -52,7 +53,7 @@ public class FSDocumentSelector implements DocumentSelector {
@Override
public boolean select(Metadata metadata) {
- String fName = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String fName = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
long sz = PropsUtil.getLong(metadata.get(Metadata.CONTENT_LENGTH), -1L);
if (maxFileSizeBytes > -1 && sz > 0) {
if (sz > maxFileSizeBytes) {
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java b/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java
index 327ba1b..7cf06d4 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/fs/FSFileResource.java
@@ -28,13 +28,14 @@ import java.util.Locale;
import org.apache.tika.batch.FileResource;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
/**
* FileSystem(FS)Resource wraps a file name.
* <p/>
* This class automatically sets the following keys in Metadata:
* <ul>
- * <li>Metadata.RESOURCE_NAME_KEY (file name)</li>
+ * <li>TikaCoreProperties.RESOURCE_NAME_KEY (file name)</li>
* <li>Metadata.CONTENT_LENGTH</li>
* <li>FSProperties.FS_REL_PATH</li>
* <li>FileResource.FILE_EXTENSION</li>
@@ -76,7 +77,7 @@ public class FSFileResource implements FileResource {
//need to set these now so that the filter can determine
//whether or not to crawl this file
- metadata.set(Metadata.RESOURCE_NAME_KEY, fullPath.getFileName().toString());
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, fullPath.getFileName().toString());
long sz = -1;
try {
sz = Files.size(fullPath);
diff --git a/tika-batch/src/test/java/org/apache/tika/batch/RecursiveParserWrapperFSConsumerTest.java b/tika-batch/src/test/java/org/apache/tika/batch/RecursiveParserWrapperFSConsumerTest.java
index dd8e7a1..ee5b955 100644
--- a/tika-batch/src/test/java/org/apache/tika/batch/RecursiveParserWrapperFSConsumerTest.java
+++ b/tika-batch/src/test/java/org/apache/tika/batch/RecursiveParserWrapperFSConsumerTest.java
@@ -46,7 +46,7 @@ public class RecursiveParserWrapperFSConsumerTest extends TikaTest {
public void testEmbeddedWithNPE() throws Exception {
final String path = "/test-documents/embedded_with_npe.xml";
final Metadata metadata = new Metadata();
- metadata.add(Metadata.RESOURCE_NAME_KEY, "embedded_with_npe.xml");
+ metadata.add(TikaCoreProperties.RESOURCE_NAME_KEY, "embedded_with_npe.xml");
ArrayBlockingQueue<FileResource> queue = new ArrayBlockingQueue<FileResource>(2);
queue.add(new FileResource() {
@@ -93,7 +93,7 @@ public class RecursiveParserWrapperFSConsumerTest extends TikaTest {
public void testEmbeddedThenNPE() throws Exception {
final String path = "/test-documents/embedded_then_npe.xml";
final Metadata metadata = new Metadata();
- metadata.add(Metadata.RESOURCE_NAME_KEY, "embedded_then_npe.xml");
+ metadata.add(TikaCoreProperties.RESOURCE_NAME_KEY, "embedded_then_npe.xml");
ArrayBlockingQueue<FileResource> queue = new ArrayBlockingQueue<FileResource>(2);
queue.add(new FileResource() {
diff --git a/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java b/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
index 52eeddd..f568311 100644
--- a/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
+++ b/tika-bundle/src/test/java/org/apache/tika/bundle/BundleIT.java
@@ -46,6 +46,7 @@ import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.detect.Detector;
import org.apache.tika.fork.ForkParser;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.DefaultParser;
@@ -130,10 +131,10 @@ public class BundleIT {
@Test
public void testBundleDetection() throws Exception {
Metadata metadataTXT = new Metadata();
- metadataTXT.set(Metadata.RESOURCE_NAME_KEY, "test.txt");
+ metadataTXT.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.txt");
Metadata metadataPDF = new Metadata();
- metadataPDF.set(Metadata.RESOURCE_NAME_KEY, "test.pdf");
+ metadataPDF.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.pdf");
// Simple type detection
assertEquals(MediaType.TEXT_PLAIN, contentTypeDetector.detect(null, metadataTXT));
diff --git a/tika-core/pom.xml b/tika-core/pom.xml
index d3e4f4a..70219d1 100644
--- a/tika-core/pom.xml
+++ b/tika-core/pom.xml
@@ -125,6 +125,7 @@
<exlude>org/apache/tika/metadata/Metadata</exlude>
<exlude>org/apache/tika/metadata/MSOffice</exlude>
<exlude>org/apache/tika/parser/EmptyParser</exlude>
+ <exclude>org/apache/tika/metadata/TikaMetadataKeys</exclude>
</excludes>
<comparisonArtifacts>
<comparisonArtifact>
diff --git a/tika-core/src/main/java/org/apache/tika/Tika.java b/tika-core/src/main/java/org/apache/tika/Tika.java
index c0cf281..f8c4cd6 100644
--- a/tika-core/src/main/java/org/apache/tika/Tika.java
+++ b/tika-core/src/main/java/org/apache/tika/Tika.java
@@ -32,6 +32,7 @@ import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.language.translate.Translator;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
@@ -180,7 +181,7 @@ public class Tika {
*/
public String detect(InputStream stream, String name) throws IOException {
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, name);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
return detect(stream, metadata);
}
diff --git a/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java b/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java
index 7135493..194a0a4 100644
--- a/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java
+++ b/tika-core/src/main/java/org/apache/tika/detect/NameDetector.java
@@ -23,6 +23,7 @@ import java.util.Map;
import java.util.regex.Pattern;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import static java.nio.charset.StandardCharsets.UTF_8;
@@ -95,7 +96,7 @@ public class NameDetector implements Detector {
*/
public MediaType detect(InputStream input, Metadata metadata) {
// Look for a resource name in the input metadata
- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name != null) {
// If the name is a URL, skip the trailing query
int question = name.indexOf('?');
diff --git a/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java b/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java
index f832c22..262157e 100644
--- a/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java
+++ b/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java
@@ -28,6 +28,7 @@ import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.AutoDetectParser;
@@ -111,7 +112,7 @@ public class ParserContainerExtractor implements ContainerExtractor {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
// Figure out what we have to process
- String filename = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String filename = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
MediaType type = detector.detect(tis, metadata);
if (extractor == null) {
diff --git a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java b/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
index d67f086..d8e6519 100644
--- a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
+++ b/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
@@ -27,6 +27,7 @@ import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.DelegatingParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
@@ -64,7 +65,7 @@ public class ParsingEmbeddedDocumentExtractor implements EmbeddedDocumentExtract
FilenameFilter filter = context.get(FilenameFilter.class);
if (filter != null) {
- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name != null) {
return filter.accept(ABSTRACT_PATH, name);
}
@@ -82,7 +83,7 @@ public class ParsingEmbeddedDocumentExtractor implements EmbeddedDocumentExtract
handler.startElement(XHTML, "div", "div", attributes);
}
- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name != null && name.length() > 0 && outputHtml) {
handler.startElement(XHTML, "h1", "h1", new AttributesImpl());
char[] chars = name.toCharArray();
diff --git a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
index d84222b..59caf7b 100644
--- a/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
+++ b/tika-core/src/main/java/org/apache/tika/io/FilenameUtils.java
@@ -83,7 +83,7 @@ public class FilenameUtils {
* <p>
* The goal of this is to get a filename from a path.
* The package parsers and some other embedded doc
- * extractors could put anything into Metadata.RESOURCE_NAME_KEY.
+ * extractors could put anything into TikaCoreProperties.RESOURCE_NAME_KEY.
* <p>
* If a careless client used that filename as if it were a
* filename and not a path when writing embedded files,
diff --git a/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java b/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
index 8c566c7..1217b36 100644
--- a/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
+++ b/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
@@ -38,6 +38,7 @@ import java.sql.Blob;
import java.sql.SQLException;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
/**
* Input stream with extended capabilities. The purpose of this class is
@@ -222,7 +223,7 @@ public class TikaInputStream extends TaggedInputStream {
*/
public static TikaInputStream get(Path path, Metadata metadata)
throws IOException {
- metadata.set(Metadata.RESOURCE_NAME_KEY, path.getFileName().toString());
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, path.getFileName().toString());
metadata.set(Metadata.CONTENT_LENGTH, Long.toString(Files.size(path)));
return new TikaInputStream(path);
}
@@ -262,7 +263,7 @@ public class TikaInputStream extends TaggedInputStream {
@Deprecated
public static TikaInputStream get(File file, Metadata metadata)
throws FileNotFoundException {
- metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName());
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, file.getName());
metadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.length()));
return new TikaInputStream(file);
}
@@ -410,7 +411,7 @@ public class TikaInputStream extends TaggedInputStream {
String path = url.getPath();
int slash = path.lastIndexOf('/');
if (slash + 1 < path.length()) { // works even with -1!
- metadata.set(Metadata.RESOURCE_NAME_KEY, path.substring(slash + 1));
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, path.substring(slash + 1));
}
String type = connection.getContentType();
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java b/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java
index 12ac0e5..f4d2552 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java
@@ -24,7 +24,7 @@ package org.apache.tika.metadata;
*/
public interface AccessPermissions {
- final static String PREFIX = "access_permission"+Metadata.NAMESPACE_PREFIX_DELIMITER;
+ final static String PREFIX = "access_permission"+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
/**
* Can any modifications be made to the document
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Database.java b/tika-core/src/main/java/org/apache/tika/metadata/Database.java
index 7f91a37..bab983b 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/Database.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Database.java
@@ -17,7 +17,7 @@ package org.apache.tika.metadata;
* limitations under the License.
*/
public interface Database {
- final static String PREFIX = "database"+Metadata.NAMESPACE_PREFIX_DELIMITER;
+ final static String PREFIX = "database"+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
Property TABLE_NAME = Property.externalTextBag(PREFIX+"table_name");
Property COLUMN_COUNT = Property.externalText(PREFIX+"column_count");
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/DublinCore.java b/tika-core/src/main/java/org/apache/tika/metadata/DublinCore.java
index 46fa52b..faa4c3b 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/DublinCore.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/DublinCore.java
@@ -37,7 +37,7 @@ public interface DublinCore {
* of Internet Media Types [MIME] defining computer media formats).
*/
Property FORMAT = Property.internalText(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "format");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "format");
/**
* Recommended best practice is to identify the resource by means of
@@ -48,13 +48,13 @@ public interface DublinCore {
* Book Number (ISBN).
*/
Property IDENTIFIER = Property.internalText(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "identifier");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "identifier");
/**
* Date on which the resource was changed.
*/
Property MODIFIED = Property.internalDate(
- PREFIX_DC_TERMS + Metadata.NAMESPACE_PREFIX_DELIMITER + "modified");
+ PREFIX_DC_TERMS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "modified");
/**
* An entity responsible for making contributions to the content of the
@@ -63,7 +63,7 @@ public interface DublinCore {
* indicate the entity.
*/
Property CONTRIBUTOR = Property.internalTextBag(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "contributor");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "contributor");
/**
* The extent or scope of the content of the resource. Coverage will
@@ -76,7 +76,7 @@ public interface DublinCore {
* numeric identifiers such as sets of coordinates or date ranges.
*/
Property COVERAGE = Property.internalText(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "coverage");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "coverage");
/**
* An entity primarily responsible for making the content of the resource.
@@ -84,13 +84,13 @@ public interface DublinCore {
* Typically, the name of a Creator should be used to indicate the entity.
*/
Property CREATOR = Property.internalTextBag(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "creator");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "creator");
/**
* Date of creation of the resource.
*/
Property CREATED = Property.internalDate(
- PREFIX_DC_TERMS + Metadata.NAMESPACE_PREFIX_DELIMITER + "created");
+ PREFIX_DC_TERMS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "created");
/**
* A date associated with an event in the life cycle of the resource.
@@ -100,7 +100,7 @@ public interface DublinCore {
* format.
*/
Property DATE = Property.internalDate(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "date");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "date");
/**
* An account of the content of the resource. Description may include
@@ -109,7 +109,7 @@ public interface DublinCore {
* the content.
*/
Property DESCRIPTION = Property.internalText(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "description");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "description");
/**
* A language of the intellectual content of the resource. Recommended
@@ -119,7 +119,7 @@ public interface DublinCore {
* "akk" for Akkadian, and "en-GB" for English used in the United Kingdom.
*/
Property LANGUAGE = Property.internalText(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "language");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "language");
/**
* An entity responsible for making the resource available. Examples of
@@ -127,7 +127,7 @@ public interface DublinCore {
* the name of a Publisher should be used to indicate the entity.
*/
Property PUBLISHER = Property.internalText(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "publisher");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "publisher");
/**
* A reference to a related resource. Recommended best practice is to
@@ -135,7 +135,7 @@ public interface DublinCore {
* a formal identification system.
*/
Property RELATION = Property.internalText(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "relation");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "relation");
/**
* Information about rights held in and over the resource. Typically,
@@ -147,7 +147,7 @@ public interface DublinCore {
* other rights with respect to the resource.
*/
Property RIGHTS = Property.internalText(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "rights");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "rights");
/**
* A reference to a resource from which the present resource is derived.
@@ -157,7 +157,7 @@ public interface DublinCore {
* system.
*/
Property SOURCE = Property.internalText(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "source");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "source");
/**
* The topic of the content of the resource. Typically, a Subject will
@@ -167,14 +167,14 @@ public interface DublinCore {
* scheme.
*/
Property SUBJECT = Property.internalTextBag(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "subject");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "subject");
/**
* A name given to the resource. Typically, a Title will be a name by
* which the resource is formally known.
*/
Property TITLE = Property.internalText(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "title");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "title");
/**
* The nature or genre of the content of the resource. Type includes terms
@@ -185,6 +185,6 @@ public interface DublinCore {
* the resource, use the Format element.
*/
Property TYPE = Property.internalText(
- PREFIX_DC + Metadata.NAMESPACE_PREFIX_DELIMITER + "type");
+ PREFIX_DC + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "type");
}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/HTML.java b/tika-core/src/main/java/org/apache/tika/metadata/HTML.java
index dd1b0da..5d5b7d3 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/HTML.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/HTML.java
@@ -24,6 +24,6 @@ public interface HTML {
* is set in the embedded document's metadata
*/
Property SCRIPT_SOURCE = Property.internalText(PREFIX_HTML_META +
- Metadata.NAMESPACE_PREFIX_DELIMITER + "scriptSrc");
+ TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "scriptSrc");
}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/IPTC.java b/tika-core/src/main/java/org/apache/tika/metadata/IPTC.java
index af51b51..2c7998e 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/IPTC.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/IPTC.java
@@ -92,7 +92,7 @@ public interface IPTC {
* Maps to this IIM property: 2:100 Country/Primary Location Code
*/
Property COUNTRY_CODE = Property.internalText(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "CountryCode");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CountryCode");
/**
* A textual description, including captions, of the item's content,
@@ -129,7 +129,7 @@ public interface IPTC {
* Maps to this IIM property: 2:04 Object Attribute Reference
*/
Property INTELLECTUAL_GENRE = Property.internalText(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "IntellectualGenre");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "IntellectualGenre");
/**
* Keywords to express the subject of the content. Keywords may be free
@@ -173,7 +173,7 @@ public interface IPTC {
* about the IPTC Scene-NewsCodes at www.newscodes.org.
*/
Property SCENE_CODE = Property.internalTextBag(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "Scene");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Scene");
/**
* Specifies one or more Subjects from the IPTC Subject-NewsCodes taxonomy
@@ -185,7 +185,7 @@ public interface IPTC {
* IPTC Subject-NewsCodes at www.newscodes.org.
*/
Property SUBJECT_CODE = Property.internalTextBag(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "SubjectCode");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "SubjectCode");
/**
* Name of a sublocation the content is focussing on -- either the
@@ -204,7 +204,7 @@ public interface IPTC {
* Maps to this IIM property: 2:92 Sublocation
*/
Property SUBLOCATION = Property.internalText(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "Location");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Location");
/**
* Designates the date and optionally the time the intellectual content was
@@ -343,7 +343,7 @@ public interface IPTC {
* specification RFC 2426.
*/
Property CREATORS_CONTACT_INFO = Property.internalText(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "CreatorContactInfo");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CreatorContactInfo");
/**
* Contains the job title of the person who created the content of this
@@ -407,7 +407,7 @@ public interface IPTC {
* Note 2: the ContactInfo naming aligns with the vCard specification RFC 2426.
*/
Property CONTACT_INFO_ADDRESS = Property.internalTextBag(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "CiAdrExtadr");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CiAdrExtadr");
/**
* The contact information city part.
@@ -417,7 +417,7 @@ public interface IPTC {
* Note 2: the ContactInfo naming aligns with the vCard specification RFC 2426.
*/
Property CONTACT_INFO_CITY = Property.internalText(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "CiAdrCity");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CiAdrCity");
/**
* The contact information country part.
@@ -427,7 +427,7 @@ public interface IPTC {
* Note 2: the ContactInfo naming aligns with the vCard specification RFC 2426.
*/
Property CONTACT_INFO_COUNTRY = Property.internalText(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "CiAdrCtry");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CiAdrCtry");
/**
* The contact information email address part.
@@ -444,7 +444,7 @@ public interface IPTC {
* specification RFC 2426.
*/
Property CONTACT_INFO_EMAIL = Property.internalTextBag(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "CiEmailWork");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CiEmailWork");
/**
* The contact information phone number part.
@@ -461,7 +461,7 @@ public interface IPTC {
* specification RFC 2426.
*/
Property CONTACT_INFO_PHONE = Property.internalTextBag(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "CiTelWork");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CiTelWork");
/**
* The contact information part denoting the local postal code.
@@ -471,7 +471,7 @@ public interface IPTC {
* Note 2: the ContactInfo naming aligns with the vCard specification RFC 2426.
*/
Property CONTACT_INFO_POSTAL_CODE = Property.internalText(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "CiAdrPcode");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CiAdrPcode");
/**
* The contact information part denoting regional information such as state or province.
@@ -481,7 +481,7 @@ public interface IPTC {
* Note 2: the ContactInfo naming aligns with the vCard specification RFC 2426.
*/
Property CONTACT_INFO_STATE_PROVINCE = Property.internalText(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "CiAdrRegion");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CiAdrRegion");
/**
* The contact information web address part. Multiple addresses can be given, separated by a comma.
@@ -494,7 +494,7 @@ public interface IPTC {
* specification RFC 2426.
*/
Property CONTACT_INFO_WEB_URL = Property.internalTextBag(
- PREFIX_IPTC_CORE + Metadata.NAMESPACE_PREFIX_DELIMITER + "CiUrlWork");
+ PREFIX_IPTC_CORE + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CiUrlWork");
/**
* As this metadata element pertains to distribution management, it was not
@@ -535,19 +535,19 @@ public interface IPTC {
* Use the Model Age field for the age of model(s).
*/
Property ADDITIONAL_MODEL_INFO = Property.internalText(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "AddlModelInfo");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "AddlModelInfo");
/**
* A set of metadata about artwork or an object in the item
*/
Property ARTWORK_OR_OBJECT = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "ArtworkOrObject");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ArtworkOrObject");
/**
* A set of metadata about artwork or an object in the item
*/
Property ORGANISATION_CODE = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "OrganisationInImageCode");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "OrganisationInImageCode");
/**
* A term to describe the content of the image by a value from a Controlled
@@ -558,7 +558,7 @@ public interface IPTC {
* IPTC metadata or other user interfaces unless agreed by the IPTC.
*/
Property CONTROLLED_VOCABULARY_TERM = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "CVterm");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CVterm");
/**
* A location the content of the item is about. For photos that is a
@@ -568,7 +568,7 @@ public interface IPTC {
* the property Location Created should be used too.
*/
Property LOCATION_SHOWN = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationShown");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationShown");
/**
* Age of the human model(s) at the time this image was taken in a model
@@ -578,7 +578,7 @@ public interface IPTC {
* young models. Ages below 18 years should not be included.
*/
Property MODEL_AGE = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "ModelAge");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ModelAge");
/**
* Name of the organisation or company which is featured in the content.
@@ -587,14 +587,14 @@ public interface IPTC {
* Organisation Code field.
*/
Property ORGANISATION_NAME = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "OrganisationInImageName");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "OrganisationInImageName");
/**
* Name of a person the content of the item is about. For photos that is a
* person shown in the image.
*/
Property PERSON = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "PersonInImage");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "PersonInImage");
/**
* Globally unique identifier for the item. It is created and applied by the
@@ -625,7 +625,7 @@ public interface IPTC {
* its type and brand can be found in the Exif/technical metadata.
*/
Property DIGITAL_IMAGE_GUID = Property.internalText(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "DigImageGUID");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "DigImageGUID");
/**
* The type of the source digital file.
@@ -635,13 +635,13 @@ public interface IPTC {
* @deprecated
*/
Property DIGITAL_SOURCE_FILE_TYPE = Property.internalText(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "DigitalSourcefileType");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "DigitalSourcefileType");
/**
* The type of the source of this digital image
*/
Property DIGITAL_SOURCE_TYPE = Property.internalText(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "DigitalSourceType");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "DigitalSourceType");
/**
* Names or describes the specific event the content relates to.
@@ -652,7 +652,7 @@ public interface IPTC {
* be named by this property too.
*/
Property EVENT = Property.internalText(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "Event");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Event");
/**
* Both a Registry Item Id and a Registry Organisation Id to record any
@@ -666,7 +666,7 @@ public interface IPTC {
* fields should be made mandatory.
*/
Property IMAGE_REGISTRY_ENTRY = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "RegistryId");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "RegistryId");
/**
* Identifies the most recent supplier of the item, who is not necessarily
@@ -681,7 +681,7 @@ public interface IPTC {
* schema.
*/
Property IMAGE_SUPPLIER = Property.internalText(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "ImageSupplier");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ImageSupplier");
/**
* Identifies the most recent supplier of the item, who is not necessarily
@@ -697,12 +697,12 @@ public interface IPTC {
*/
Property IMAGE_SUPPLIER_ID = Property.composite(
Property.internalText(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "ImageSupplierID"),
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ImageSupplierID"),
new Property[] { Property.internalText(IPTC.IMAGE_SUPPLIER_ID_WRONG_CASE) });
/** @deprecated use {@link IPTC#IMAGE_SUPPLIER_ID} */
public static final String IMAGE_SUPPLIER_ID_WRONG_CASE =
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "ImageSupplierId";
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ImageSupplierId";
/**
* Identifies the most recent supplier of the item, who is not necessarily
@@ -717,7 +717,7 @@ public interface IPTC {
* schema.
*/
Property IMAGE_SUPPLIER_NAME = Property.internalText(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "ImageSupplierName");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ImageSupplierName");
/**
* Optional identifier assigned by the Image Supplier to the image.
@@ -726,7 +726,7 @@ public interface IPTC {
* schema.
*/
Property IMAGE_SUPPLIER_IMAGE_ID = Property.internalText(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "ImageSupplierImageID");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ImageSupplierImageID");
/**
* The date and optionally time when any of the IPTC photo metadata fields
@@ -742,7 +742,7 @@ public interface IPTC {
* property of the XMP Basic scheme.
*/
Property IPTC_LAST_EDITED = Property.internalDate(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "IptcLastEdited");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "IptcLastEdited");
/**
* The location the content of the item was created.
@@ -752,21 +752,21 @@ public interface IPTC {
* used.
*/
Property LOCATION_CREATED = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationCreated");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationCreated");
/**
* The maximum available height in pixels of the original photo from which
* this photo has been derived by downsizing.
*/
Property MAX_AVAIL_HEIGHT = Property.internalInteger(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "MaxAvailHeight");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "MaxAvailHeight");
/**
* The maximum available width in pixels of the original photo from which
* this photo has been derived by downsizing.
*/
Property MAX_AVAIL_WIDTH = Property.internalInteger(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "MaxAvailWidth");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "MaxAvailWidth");
/**
* The version number of the PLUS standards in place at the time of the
@@ -777,7 +777,7 @@ public interface IPTC {
* "PLUS Version" should be set to the string "1.2.0"
*/
Property PLUS_VERSION = Property.internalText(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "Version");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Version");
/**
* Owner or owners of the copyright in the licensed image.
@@ -789,7 +789,7 @@ public interface IPTC {
* schema.
*/
Property COPYRIGHT_OWNER = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "CopyrightOwner");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CopyrightOwner");
/**
* The ID of the owner or owners of the copyright in the licensed image.
@@ -802,12 +802,12 @@ public interface IPTC {
*/
Property COPYRIGHT_OWNER_ID = Property.composite(
Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "CopyrightOwnerID"),
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CopyrightOwnerID"),
new Property[] { Property.internalTextBag(IPTC.COPYRIGHT_OWNER_ID_WRONG_CASE) });
/** @deprecated use {@link IPTC#COPYRIGHT_OWNER_ID} */
public static final String COPYRIGHT_OWNER_ID_WRONG_CASE =
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "CopyrightOwnerId";
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CopyrightOwnerId";
/**
* The name of the owner or owners of the copyright in the licensed image.
@@ -819,7 +819,7 @@ public interface IPTC {
* schema.
*/
Property COPYRIGHT_OWNER_NAME = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "CopyrightOwnerName");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CopyrightOwnerName");
/**
* Creator or creators of the image.
@@ -834,7 +834,7 @@ public interface IPTC {
* schema.
*/
Property IMAGE_CREATOR = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "ImageCreator");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ImageCreator");
/**
* The ID of the creator or creators of the image.
@@ -850,12 +850,12 @@ public interface IPTC {
*/
Property IMAGE_CREATOR_ID = Property.composite(
Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "ImageCreatorID"),
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ImageCreatorID"),
new Property[] { Property.internalTextBag(IPTC.IMAGE_CREATOR_ID_WRONG_CASE) });
/** @deprecated use {@link IPTC#IMAGE_CREATOR_ID} */
public static final String IMAGE_CREATOR_ID_WRONG_CASE =
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "ImageCreatorId";
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ImageCreatorId";
/**
* The name of the creator or creators of the image.
@@ -870,7 +870,7 @@ public interface IPTC {
* schema.
*/
Property IMAGE_CREATOR_NAME = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "ImageCreatorName");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ImageCreatorName");
/**
* A person or company that should be contacted to obtain a licence for
@@ -880,7 +880,7 @@ public interface IPTC {
* schema.
*/
Property LICENSOR = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "Licensor");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Licensor");
/**
* The ID of the person or company that should be contacted to obtain a licence for
@@ -891,12 +891,12 @@ public interface IPTC {
*/
Property LICENSOR_ID = Property.composite(
Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorID"),
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorID"),
new Property[] { Property.internalTextBag(IPTC.LICENSOR_ID_WRONG_CASE) });
/** @deprecated use {@link IPTC#LICENSOR_ID} */
public static final String LICENSOR_ID_WRONG_CASE =
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorId";
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorId";
/**
* The name of the person or company that should be contacted to obtain a licence for
@@ -906,7 +906,7 @@ public interface IPTC {
* schema.
*/
Property LICENSOR_NAME = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorName");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorName");
/**
* The city of a person or company that should be contacted to obtain a licence for
@@ -916,7 +916,7 @@ public interface IPTC {
* schema.
*/
Property LICENSOR_CITY = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorCity");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorCity");
/**
* The country of a person or company that should be contacted to obtain a licence for
@@ -926,7 +926,7 @@ public interface IPTC {
* schema.
*/
Property LICENSOR_COUNTRY = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorCountry");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorCountry");
/**
* The email of a person or company that should be contacted to obtain a licence for
@@ -936,7 +936,7 @@ public interface IPTC {
* schema.
*/
Property LICENSOR_EMAIL = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorEmail");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorEmail");
/**
* The extended address of a person or company that should be contacted to obtain a licence for
@@ -946,7 +946,7 @@ public interface IPTC {
* schema.
*/
Property LICENSOR_EXTENDED_ADDRESS = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorExtendedAddress");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorExtendedAddress");
/**
* The postal code of a person or company that should be contacted to obtain a licence for
@@ -956,7 +956,7 @@ public interface IPTC {
* schema.
*/
Property LICENSOR_POSTAL_CODE = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorPostalCode");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorPostalCode");
/**
* The region of a person or company that should be contacted to obtain a licence for
@@ -966,7 +966,7 @@ public interface IPTC {
* schema.
*/
Property LICENSOR_REGION = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorRegion");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorRegion");
/**
* The street address of a person or company that should be contacted to obtain a licence for
@@ -976,7 +976,7 @@ public interface IPTC {
* schema.
*/
Property LICENSOR_STREET_ADDRESS = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorStreetAddress");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorStreetAddress");
/**
* The phone number of a person or company that should be contacted to obtain a licence for
@@ -986,7 +986,7 @@ public interface IPTC {
* schema.
*/
Property LICENSOR_TELEPHONE_1 = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorTelephone1");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorTelephone1");
/**
* The phone number of a person or company that should be contacted to obtain a licence for
@@ -996,7 +996,7 @@ public interface IPTC {
* schema.
*/
Property LICENSOR_TELEPHONE_2 = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorTelephone2");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorTelephone2");
/**
* The URL of a person or company that should be contacted to obtain a licence for
@@ -1006,7 +1006,7 @@ public interface IPTC {
* schema.
*/
Property LICENSOR_URL = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "LicensorURL");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LicensorURL");
/**
* Age of the youngest model pictured in the image, at the time that the
@@ -1020,7 +1020,7 @@ public interface IPTC {
* schema.
*/
Property MINOR_MODEL_AGE_DISCLOSURE = Property.internalText(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "MinorModelAgeDisclosure");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "MinorModelAgeDisclosure");
/**
* Optional identifier associated with each Model Release.
@@ -1029,7 +1029,7 @@ public interface IPTC {
* schema.
*/
Property MODEL_RELEASE_ID = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "ModelReleaseID");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ModelReleaseID");
/**
* Summarizes the availability and scope of model releases authorizing usage
@@ -1043,7 +1043,7 @@ public interface IPTC {
* schema.
*/
Property MODEL_RELEASE_STATUS = Property.internalText(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "ModelReleaseStatus");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ModelReleaseStatus");
/**
* Optional identifier associated with each Property Release.
@@ -1052,7 +1052,7 @@ public interface IPTC {
* schema.
*/
Property PROPERTY_RELEASE_ID = Property.internalTextBag(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "PropertyReleaseID");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "PropertyReleaseID");
/**
* Summarises the availability and scope of property releases authorizing
@@ -1065,7 +1065,7 @@ public interface IPTC {
* schema.
*/
Property PROPERTY_RELEASE_STATUS = Property.internalText(
- PREFIX_PLUS + Metadata.NAMESPACE_PREFIX_DELIMITER + "PropertyReleaseStatus");
+ PREFIX_PLUS + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "PropertyReleaseStatus");
/**
* Contains any necessary copyright notice for claiming the intellectual
@@ -1074,13 +1074,13 @@ public interface IPTC {
* property rights.
*/
Property ARTWORK_OR_OBJECT_DETAIL_COPYRIGHT_NOTICE = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "AOCopyrightNotice");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "AOCopyrightNotice");
/**
* Contains the name of the artist who has created artwork or an object in the image.
*/
Property ARTWORK_OR_OBJECT_DETAIL_CREATOR = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "AOCreator");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "AOCreator");
/**
* Designates the date and optionally the time the artwork or object in the
@@ -1088,34 +1088,34 @@ public interface IPTC {
* intellectual property rights.
*/
Property ARTWORK_OR_OBJECT_DETAIL_DATE_CREATED = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "AODateCreated");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "AODateCreated");
/**
* The organisation or body holding and registering the artwork or object in
* the image for inventory purposes.
*/
Property ARTWORK_OR_OBJECT_DETAIL_SOURCE = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "AOSource");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "AOSource");
/**
* The inventory number issued by the organisation or body holding and
* registering the artwork or object in the image.
*/
Property ARTWORK_OR_OBJECT_DETAIL_SOURCE_INVENTORY_NUMBER = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "AOSourceInvNo");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "AOSourceInvNo");
/**
* A reference for the artwork or object in the image.
*/
Property ARTWORK_OR_OBJECT_DETAIL_TITLE = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "AOTitle");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "AOTitle");
/**
* Name of the city of a location. This element is at the fourth level of a
* top-down geographical hierarchy.
*/
Property LOCATION_SHOWN_CITY = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationShownCity");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationShownCity");
/**
* The ISO code of a country of a location. This element is at the second
@@ -1126,14 +1126,14 @@ public interface IPTC {
* scheme as no explicit indication can be provided.
*/
Property LOCATION_SHOWN_COUNTRY_CODE = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationShownCountryCode");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationShownCountryCode");
/**
* The name of a country of a location. This element is at the second level
* of a top-down geographical hierarchy.
*/
Property LOCATION_SHOWN_COUNTRY_NAME = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationShownCountryName");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationShownCountryName");
/**
* The name of a subregion of a country - a province or state - of a
@@ -1141,7 +1141,7 @@ public interface IPTC {
* hierarchy.
*/
Property LOCATION_SHOWN_PROVINCE_OR_STATE = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationShownProvinceState");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationShownProvinceState");
/**
* Name of a sublocation. This sublocation name could either be the name of
@@ -1150,21 +1150,21 @@ public interface IPTC {
* element is at the fifth level of a top-down geographical hierarchy.
*/
Property LOCATION_SHOWN_SUBLOCATION = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationShownSublocation");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationShownSublocation");
/**
* The name of a world region of a location. This element is at the first
* (topI) level of a top- down geographical hierarchy.
*/
Property LOCATION_SHOWN_WORLD_REGION = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationShownWorldRegion");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationShownWorldRegion");
/**
* Name of the city of a location. This element is at the fourth level of a
* top-down geographical hierarchy.
*/
Property LOCATION_CREATED_CITY = Property.internalText(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationCreatedCity");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationCreatedCity");
/**
* The ISO code of a country of a location. This element is at the second
@@ -1175,14 +1175,14 @@ public interface IPTC {
* scheme as no explicit indication can be provided.
*/
Property LOCATION_CREATED_COUNTRY_CODE = Property.internalText(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationCreatedCountryCode");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationCreatedCountryCode");
/**
* The name of a country of a location. This element is at the second level
* of a top-down geographical hierarchy.
*/
Property LOCATION_CREATED_COUNTRY_NAME = Property.internalText(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationCreatedCountryName");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationCreatedCountryName");
/**
* The name of a subregion of a country - a province or state - of a
@@ -1190,7 +1190,7 @@ public interface IPTC {
* hierarchy.
*/
Property LOCATION_CREATED_PROVINCE_OR_STATE = Property.internalText(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationCreatedProvinceState");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationCreatedProvinceState");
/**
* Name of a sublocation. This sublocation name could either be the name of
@@ -1199,14 +1199,14 @@ public interface IPTC {
* element is at the fifth level of a top-down geographical hierarchy.
*/
Property LOCATION_CREATED_SUBLOCATION = Property.internalText(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationCreatedSublocation");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationCreatedSublocation");
/**
* The name of a world region of a location. This element is at the first
* (topI) level of a top- down geographical hierarchy.
*/
Property LOCATION_CREATED_WORLD_REGION = Property.internalText(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "LocationCreatedWorldRegion");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LocationCreatedWorldRegion");
/**
* A unique identifier created by a registry and applied by the creator of
@@ -1214,13 +1214,13 @@ public interface IPTC {
* identifier is linked to a corresponding Registry Organisation Identifier.
*/
Property REGISTRY_ENTRY_CREATED_ITEM_ID = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "RegItemId");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "RegItemId");
/**
* An identifier for the registry which issued the corresponding Registry Image Id.
*/
Property REGISTRY_ENTRY_CREATED_ORGANISATION_ID = Property.internalTextBag(
- PREFIX_IPTC_EXT + Metadata.NAMESPACE_PREFIX_DELIMITER + "RegOrgId");
+ PREFIX_IPTC_EXT + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "RegOrgId");
Property[] PROPERTY_GROUP_IPTC_CORE = new Property[] {
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java b/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java
deleted file mode 100644
index 886aa43..0000000
--- a/tika-core/src/main/java/org/apache/tika/metadata/MSOffice.java
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.metadata;
-
-/**
- * A collection of Microsoft Office and Open Document property names.
- *
- * This is being replaced with cleaner, better defined properties in
- * {@link Office}.
- */
-public interface MSOffice {
-
- @Deprecated String KEYWORDS = "Keywords";
-
- @Deprecated String COMMENTS = "Comments";
-
- @Deprecated String LAST_AUTHOR = "Last-Author";
-
- @Deprecated String AUTHOR = "Author";
-
- @Deprecated String APPLICATION_NAME = "Application-Name";
-
- @Deprecated String REVISION_NUMBER = "Revision-Number";
-
- @Deprecated String TEMPLATE = "Template";
-
- @Deprecated String TOTAL_TIME = "Total-Time";
-
- @Deprecated String PRESENTATION_FORMAT = "Presentation-Format";
-
- @Deprecated String NOTES = "Notes";
-
- @Deprecated String MANAGER = "Manager";
-
- @Deprecated String APPLICATION_VERSION = "Application-Version";
-
- @Deprecated String VERSION = "Version";
-
- @Deprecated String CONTENT_STATUS = "Content-Status";
-
- @Deprecated String CATEGORY = "Category";
-
- @Deprecated String COMPANY = "Company";
-
- @Deprecated String SECURITY = "Security";
-
-
- /** The number of Slides are there in the (presentation) document */
- @Deprecated Property SLIDE_COUNT =
- Property.internalInteger("Slide-Count");
-
- /** The number of Pages are there in the (paged) document */
- @Deprecated Property PAGE_COUNT =
- Property.internalInteger("Page-Count");
-
- /** The number of individual Paragraphs in the document */
- @Deprecated Property PARAGRAPH_COUNT =
- Property.internalInteger("Paragraph-Count");
-
- /** The number of lines in the document */
- @Deprecated Property LINE_COUNT =
- Property.internalInteger("Line-Count");
-
- /** The number of Words in the document */
- @Deprecated Property WORD_COUNT =
- Property.internalInteger("Word-Count");
-
- /** The number of Characters in the document */
- @Deprecated Property CHARACTER_COUNT =
- Property.internalInteger("Character Count");
-
- /** The number of Characters in the document, including spaces */
- @Deprecated Property CHARACTER_COUNT_WITH_SPACES =
- Property.internalInteger("Character-Count-With-Spaces");
-
- /** The number of Tables in the document */
- @Deprecated Property TABLE_COUNT =
- Property.internalInteger("Table-Count");
-
- /** The number of Images in the document */
- @Deprecated Property IMAGE_COUNT =
- Property.internalInteger("Image-Count");
-
- /**
- * The number of Objects in the document.
- * This is typically non-Image resources embedded in the
- * document, such as other documents or non-Image media.
- */
- @Deprecated Property OBJECT_COUNT =
- Property.internalInteger("Object-Count");
-
-
- /** How long has been spent editing the document? */
- String EDIT_TIME = "Edit-Time";
-
- /** When was the document created? */
- @Deprecated Property CREATION_DATE =
- Property.internalDate("Creation-Date");
-
- /** When was the document last saved? */
- @Deprecated Property LAST_SAVED =
- Property.internalDate("Last-Save-Date");
-
- /** When was the document last printed? */
- @Deprecated Property LAST_PRINTED =
- Property.internalDate("Last-Printed");
-
- /**
- * For user defined metadata entries in the document,
- * what prefix should be attached to the key names.
- * eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
- */
- String USER_DEFINED_METADATA_NAME_PREFIX = "custom:";
-}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Message.java b/tika-core/src/main/java/org/apache/tika/metadata/Message.java
index 38bd701..07fc52a 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/Message.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Message.java
@@ -22,9 +22,9 @@ package org.apache.tika.metadata;
* See also {@link Office}'s MAPI-specific properties.
*/
public interface Message {
- String MESSAGE_PREFIX = "Message"+ Metadata.NAMESPACE_PREFIX_DELIMITER;
+ String MESSAGE_PREFIX = "Message"+ TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
- String MESSAGE_RAW_HEADER_PREFIX = MESSAGE_PREFIX+"Raw-Header"+Metadata.NAMESPACE_PREFIX_DELIMITER;
+ String MESSAGE_RAW_HEADER_PREFIX = MESSAGE_PREFIX+"Raw-Header"+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
String MESSAGE_RECIPIENT_ADDRESS = "Message-Recipient-Address";
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java b/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
index 18c0cf0..80666ba 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Metadata.java
@@ -42,7 +42,7 @@ import org.apache.tika.utils.DateUtils;
* A multi-valued metadata container.
*/
public class Metadata implements CreativeCommons, Geographic, HttpHeaders,
- Message, MSOffice, ClimateForcast, TIFF, TikaMetadataKeys, TikaMimeKeys,
+ Message, ClimateForcast, TIFF, TikaMimeKeys,
Serializable {
/** Serial version UID */
@@ -53,43 +53,7 @@ public class Metadata implements CreativeCommons, Geographic, HttpHeaders,
*/
private Map<String, String[]> metadata = null;
- /**
- * The common delimiter used between the namespace abbreviation and the property name
- */
- public static final String NAMESPACE_PREFIX_DELIMITER = ":";
-
- /** @deprecated use TikaCoreProperties#FORMAT */
- public static final String FORMAT = "format";
- /** @deprecated use TikaCoreProperties#IDENTIFIER */
- public static final String IDENTIFIER = "identifier";
- /** @deprecated use TikaCoreProperties#MODIFIED */
- public static final String MODIFIED = "modified";
- /** @deprecated use TikaCoreProperties#CONTRIBUTOR */
- public static final String CONTRIBUTOR = "contributor";
- /** @deprecated use TikaCoreProperties#COVERAGE */
- public static final String COVERAGE = "coverage";
- /** @deprecated use TikaCoreProperties#CREATOR */
- public static final String CREATOR = "creator";
- /** @deprecated use TikaCoreProperties#CREATED */
- public static final Property DATE = Property.internalDate("date");
- /** @deprecated use TikaCoreProperties#DESCRIPTION */
- public static final String DESCRIPTION = "description";
- /** @deprecated use TikaCoreProperties#LANGUAGE */
- public static final String LANGUAGE = "language";
- /** @deprecated use TikaCoreProperties#PUBLISHER */
- public static final String PUBLISHER = "publisher";
- /** @deprecated use TikaCoreProperties#RELATION */
- public static final String RELATION = "relation";
- /** @deprecated use TikaCoreProperties#RIGHTS */
- public static final String RIGHTS = "rights";
- /** @deprecated use TikaCoreProperties#SOURCE */
- public static final String SOURCE = "source";
- /** @deprecated use TikaCoreProperties#KEYWORDS */
- public static final String SUBJECT = "subject";
- /** @deprecated use TikaCoreProperties#TITLE */
- public static final String TITLE = "title";
- /** @deprecated use TikaCoreProperties#TYPE */
- public static final String TYPE = "type";
+
/**
* Some parsers will have the date as a ISO-8601 string
@@ -298,16 +262,30 @@ public class Metadata implements CreativeCommons, Geographic, HttpHeaders,
* the metadata value.
*/
public void add(final Property property, final String value) {
- String[] values = metadata.get(property.getName());
- if (values == null) {
- set(property, value);
+
+ if (property == null) {
+ throw new NullPointerException("property must not be null");
+ }
+ if (property.getPropertyType() == PropertyType.COMPOSITE) {
+ add(property.getPrimaryProperty(), value);
+ if (property.getSecondaryExtractProperties() != null) {
+ for (Property secondaryExtractProperty : property.getSecondaryExtractProperties()) {
+ add(secondaryExtractProperty, value);
+ }
+ }
} else {
- if (property.isMultiValuePermitted()) {
- set(property, appendedValues(values, value));
- } else {
- throw new PropertyTypeException(property.getName() +
- " : " + property.getPropertyType());
- }
+ String[] values = metadata.get(property.getName());
+
+ if (values == null) {
+ set(property, value);
+ } else {
+ if (property.isMultiValuePermitted()) {
+ set(property, appendedValues(values, value));
+ } else {
+ throw new PropertyTypeException(property.getName() +
+ " : " + property.getPropertyType());
+ }
+ }
}
}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Office.java b/tika-core/src/main/java/org/apache/tika/metadata/Office.java
index a02db76..5c5af2e 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/Office.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Office.java
@@ -22,9 +22,6 @@ package org.apache.tika.metadata;
* to) MS Office and OpenDocument formats.
* This is a logical collection of properties, which may be drawn from a
* few different external definitions.
- *
- * Note that some of the legacy properties from the {@link MSOffice}
- * collection still need to be migrated over
*
* @since Apache Tika 1.2
*/
@@ -40,103 +37,107 @@ public interface Office {
*/
public static final String USER_DEFINED_METADATA_NAME_PREFIX = "custom:";
-
+
/**
- * Keywords pertaining to a document.
+ * Keywords pertaining to a document. Also populates {@link DublinCore#SUBJECT}.
*/
- Property KEYWORDS = Property.internalTextBag(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "keyword");
+ Property KEYWORDS = Property.composite(
+ Property.internalTextBag(
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "keyword"),
+ new Property[] {
+ DublinCore.SUBJECT,
+ });
/**
* Name of the initial creator/author of a document
*/
Property INITIAL_AUTHOR = Property.internalText(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "initial-author");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "initial-author");
/**
* Name of the last (most recent) author of a document
*/
Property LAST_AUTHOR = Property.internalText(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "last-author");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "last-author");
/**
* Name of the principal author(s) of a document
*/
Property AUTHOR = Property.internalTextBag(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "author");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "author");
/** When was the document created? */
Property CREATION_DATE = Property.internalDate(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "creation-date");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "creation-date");
/** When was the document last saved? */
Property SAVE_DATE = Property.internalDate(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "save-date");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "save-date");
/** When was the document last printed? */
Property PRINT_DATE = Property.internalDate(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "print-date");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "print-date");
/** The number of Slides are there in the (presentation) document */
Property SLIDE_COUNT = Property.internalInteger(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "slide-count");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "slide-count");
/** The number of Pages are there in the (paged) document */
Property PAGE_COUNT = Property.internalInteger(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "page-count");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "page-count");
/** The number of individual Paragraphs in the document */
Property PARAGRAPH_COUNT = Property.internalInteger(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "paragraph-count");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "paragraph-count");
/** The number of lines in the document */
Property LINE_COUNT = Property.internalInteger(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "line-count");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "line-count");
/** The number of Words in the document */
Property WORD_COUNT = Property.internalInteger(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "word-count");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "word-count");
/** The number of Characters in the document */
Property CHARACTER_COUNT = Property.internalInteger(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "character-count");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "character-count");
/** The number of Characters in the document, including spaces */
Property CHARACTER_COUNT_WITH_SPACES = Property.internalInteger(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "character-count-with-spaces");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "character-count-with-spaces");
/** The number of Tables in the document */
Property TABLE_COUNT = Property.internalInteger(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "table-count");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "table-count");
/** The number of Images in the document */
Property IMAGE_COUNT = Property.internalInteger(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "image-count");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "image-count");
/**
* The number of Objects in the document. These are typically non-Image resources
* embedded in the document, such as other documents or non-Image media.
*/
Property OBJECT_COUNT = Property.internalInteger(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "object-count");
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "object-count");
/**
* MAPI message class. What type of .msg/MAPI file is it?
*/
Property MAPI_MESSAGE_CLASS = Property.internalClosedChoise(
- PREFIX_DOC_META + Metadata.NAMESPACE_PREFIX_DELIMITER + "mapi-message-class",
+ PREFIX_DOC_META + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "mapi-message-class",
"APPOINTMENT", "CONTACT", "NOTE", "STICKY_NOTE", "POST", "TASK", "UNKNOWN", "UNSPECIFIED" );
Property MAPI_SENT_BY_SERVER_TYPE = Property.internalText(
- PREFIX_DOC_META+Metadata.NAMESPACE_PREFIX_DELIMITER+"mapi-sent-by-server-type");
+ PREFIX_DOC_META+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER+"mapi-sent-by-server-type");
Property MAPI_FROM_REPRESENTING_NAME = Property.internalText(
- PREFIX_DOC_META+Metadata.NAMESPACE_PREFIX_DELIMITER+"mapi-from-representing-name");
+ PREFIX_DOC_META+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER+"mapi-from-representing-name");
Property MAPI_FROM_REPRESENTING_EMAIL = Property.internalText(
- PREFIX_DOC_META+Metadata.NAMESPACE_PREFIX_DELIMITER+"mapi-from-representing-email");
+ PREFIX_DOC_META+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER+"mapi-from-representing-email");
}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/OfficeOpenXMLCore.java b/tika-core/src/main/java/org/apache/tika/metadata/OfficeOpenXMLCore.java
index 1a803b8..577bbce 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/OfficeOpenXMLCore.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/OfficeOpenXMLCore.java
@@ -36,41 +36,44 @@ public interface OfficeOpenXMLCore
* A categorization of the content of this package.
*/
Property CATEGORY = Property.externalText(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "category");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "category");
/**
* The status of the content.
*/
Property CONTENT_STATUS = Property.externalText(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "contentStatus");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "contentStatus");
/**
* The user who performed the last modification. The identification is environment-specific.
*/
Property LAST_MODIFIED_BY = Property.externalText(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "lastModifiedBy");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "lastModifiedBy");
/**
* The date and time of the last printing.
*/
Property LAST_PRINTED = Property.externalDate(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "lastPrinted");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "lastPrinted");
/**
* The revision number.
*/
Property REVISION = Property.externalText(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "revision");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "revision");
/**
* The version number. This value is set by the user or by the application.
*/
Property VERSION = Property.externalText(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "version");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "version");
/**
- * The document's subject.
+ * The document's subject. Also populates {@link DublinCore#SUBJECT}
*/
- Property SUBJECT = Property.externalText(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "subject");
+ Property SUBJECT = Property.composite(Property.externalText(
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "subject"),
+ new Property[] {
+ DublinCore.SUBJECT,
+ });
}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/OfficeOpenXMLExtended.java b/tika-core/src/main/java/org/apache/tika/metadata/OfficeOpenXMLExtended.java
index 5829339..0557247 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/OfficeOpenXMLExtended.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/OfficeOpenXMLExtended.java
@@ -36,34 +36,34 @@ public interface OfficeOpenXMLExtended
String WORD_PROCESSING_PREFIX = "w";
Property TEMPLATE = Property.externalText(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "Template");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Template");
Property MANAGER = Property.externalTextBag(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "Manager");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Manager");
- Property COMPANY = Property.externalText( PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "Company");
+ Property COMPANY = Property.externalText( PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Company");
Property PRESENTATION_FORMAT = Property.externalText(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "PresentationFormat");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "PresentationFormat");
Property NOTES = Property.externalInteger(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "Notes");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Notes");
Property TOTAL_TIME = Property.externalInteger(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "TotalTime");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "TotalTime");
Property HIDDEN_SLIDES = Property.externalInteger(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "HiddedSlides");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "HiddedSlides");
Property APPLICATION = Property.externalText(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "Application");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Application");
Property APP_VERSION = Property.externalText(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "AppVersion");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "AppVersion");
Property DOC_SECURITY = Property.externalInteger(
- PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "DocSecurity");
+ PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "DocSecurity");
Property COMMENTS = Property.externalTextBag(
- WORD_PROCESSING_PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER + "comments");
+ WORD_PROCESSING_PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Comments");
}
\ No newline at end of file
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/PDF.java b/tika-core/src/main/java/org/apache/tika/metadata/PDF.java
index 90b1fc0..b08643e 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/PDF.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/PDF.java
@@ -23,19 +23,19 @@ package org.apache.tika.metadata;
*/
public interface PDF {
- String PDF_PREFIX = "pdf"+Metadata.NAMESPACE_PREFIX_DELIMITER;
- String PDFA_PREFIX = "pdfa"+Metadata.NAMESPACE_PREFIX_DELIMITER;
- String PDFAID_PREFIX = "pdfaid"+Metadata.NAMESPACE_PREFIX_DELIMITER;
+ String PDF_PREFIX = "pdf"+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
+ String PDFA_PREFIX = "pdfa"+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
+ String PDFAID_PREFIX = "pdfaid"+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
/**
* Prefix to be used for properties that record what was stored
* in the docinfo section (as opposed to XMP)
*/
String PDF_DOC_INFO_PREFIX = PDF_PREFIX + "docinfo" +
- Metadata.NAMESPACE_PREFIX_DELIMITER;
+ TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
String PDF_DOC_INFO_CUSTOM_PREFIX = PDF_DOC_INFO_PREFIX+"custom"+
- Metadata.NAMESPACE_PREFIX_DELIMITER;
+ TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
Property DOC_INFO_CREATED = Property.internalDate(PDF_DOC_INFO_PREFIX + "created");
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/Photoshop.java b/tika-core/src/main/java/org/apache/tika/metadata/Photoshop.java
index 76bd4d9..0e12a2f 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/Photoshop.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/Photoshop.java
@@ -36,53 +36,53 @@ public interface Photoshop {
String PREFIX_PHOTOSHOP = "photoshop";
Property AUTHORS_POSITION = Property.internalText(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "AuthorsPosition");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "AuthorsPosition");
// TODO Replace this with proper indexed choices support
String[] _COLOR_MODE_CHOICES_INDEXED = { "Bitmap", "Greyscale", "Indexed Colour",
"RGB Color", "CMYK Colour", "Multi-Channel", "Duotone", "LAB Colour",
"reserved", "reserved", "YCbCr Colour", "YCgCo Colour", "YCbCrK Colour"};
Property COLOR_MODE = Property.internalClosedChoise(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "ColorMode",
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ColorMode",
_COLOR_MODE_CHOICES_INDEXED);
Property CAPTION_WRITER = Property.internalText(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "CaptionWriter");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "CaptionWriter");
Property CATEGORY = Property.internalText(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "Category");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Category");
Property CITY = Property.internalText(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "City");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "City");
Property COUNTRY = Property.internalText(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "Country");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Country");
Property CREDIT = Property.internalText(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "Credit");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Credit");
Property DATE_CREATED = Property.internalDate(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "DateCreated");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "DateCreated");
Property HEADLINE = Property.internalText(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "Headline");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Headline");
Property INSTRUCTIONS = Property.internalText(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "Instructions");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Instructions");
Property SOURCE = Property.internalText(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "Source");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Source");
Property STATE = Property.internalText(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "State");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "State");
Property SUPPLEMENTAL_CATEGORIES = Property.internalTextBag(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "SupplementalCategories");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "SupplementalCategories");
Property TRANSMISSION_REFERENCE = Property.internalText(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "TransmissionReference");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "TransmissionReference");
Property URGENCY = Property.internalText(
- PREFIX_PHOTOSHOP + Metadata.NAMESPACE_PREFIX_DELIMITER + "Urgency");
+ PREFIX_PHOTOSHOP + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Urgency");
}
\ No newline at end of file
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/QuattroPro.java b/tika-core/src/main/java/org/apache/tika/metadata/QuattroPro.java
index a106e08..1914809 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/QuattroPro.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/QuattroPro.java
@@ -26,23 +26,23 @@ public interface QuattroPro {
*/
Property ID = Property.internalText(
QUATTROPRO_METADATA_NAME_PREFIX
- + Metadata.NAMESPACE_PREFIX_DELIMITER + "Id");
+ + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Id");
/**
* Version.
*/
Property VERSION = Property.internalInteger(
QUATTROPRO_METADATA_NAME_PREFIX
- + Metadata.NAMESPACE_PREFIX_DELIMITER + "Version");
+ + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Version");
/**
* Build.
*/
Property BUILD = Property.internalInteger(
QUATTROPRO_METADATA_NAME_PREFIX
- + Metadata.NAMESPACE_PREFIX_DELIMITER + "Build");
+ + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Build");
/**
* Lowest version.
*/
Property LOWEST_VERSION = Property.internalInteger(
QUATTROPRO_METADATA_NAME_PREFIX
- + Metadata.NAMESPACE_PREFIX_DELIMITER + "LowestVersion");
+ + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LowestVersion");
}
\ No newline at end of file
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/RTFMetadata.java b/tika-core/src/main/java/org/apache/tika/metadata/RTFMetadata.java
index e2c1471..b887288 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/RTFMetadata.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/RTFMetadata.java
@@ -25,22 +25,22 @@ RTFMetadata {
* any time a pict/emf/wmf is in an object
*/
Property THUMBNAIL = Property.internalBoolean(PREFIX_RTF_META+
- Metadata.NAMESPACE_PREFIX_DELIMITER+"thumbnail");
+ TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER+"thumbnail");
/**
* if an application and version is given as part of the
* embedded object, this is the literal string
*/
Property EMB_APP_VERSION = Property.internalText(PREFIX_RTF_META+
- Metadata.NAMESPACE_PREFIX_DELIMITER+"emb_app_version");
+ TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER+"emb_app_version");
Property EMB_CLASS = Property.internalText(PREFIX_RTF_META+
- Metadata.NAMESPACE_PREFIX_DELIMITER+"emb_class");
+ TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER+"emb_class");
Property EMB_TOPIC = Property.internalText(PREFIX_RTF_META+
- Metadata.NAMESPACE_PREFIX_DELIMITER+"emb_topic");
+ TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER+"emb_topic");
Property EMB_ITEM = Property.internalText(PREFIX_RTF_META+
- Metadata.NAMESPACE_PREFIX_DELIMITER+"emb_item");
+ TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER+"emb_item");
}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
index e97562d..420badf 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
@@ -60,19 +60,24 @@ public interface TikaCoreProperties {
};
/**
+ * The common delimiter used between the namespace abbreviation and the property name
+ */
+ String NAMESPACE_PREFIX_DELIMITER = ":";
+
+ /**
* Use this to prefix metadata properties that store information
* about the parsing process. Users should be able to distinguish
* between metadata that was contained within the document and
* metadata about the parsing process.
* In Tika 2.0 (or earlier?), let's change X-ParsedBy to X-TIKA-Parsed-By.
*/
- public static String TIKA_META_PREFIX = "X-TIKA"+Metadata.NAMESPACE_PREFIX_DELIMITER;
+ public static String TIKA_META_PREFIX = "X-TIKA"+NAMESPACE_PREFIX_DELIMITER;
/**
* Use this to store parse exception information in the Metadata object.
*/
public static String TIKA_META_EXCEPTION_PREFIX = TIKA_META_PREFIX+"EXCEPTION"+
- Metadata.NAMESPACE_PREFIX_DELIMITER;
+ NAMESPACE_PREFIX_DELIMITER;
/**
* Use this to store exceptions caught during a parse that are
@@ -92,6 +97,16 @@ public interface TikaCoreProperties {
Property.internalTextBag(TIKA_META_EXCEPTION_PREFIX+"embedded_stream_exception");
+ String RESOURCE_NAME_KEY = "resourceName";
+
+ String PROTECTED = "protected";
+
+ String EMBEDDED_RELATIONSHIP_ID = "embeddedRelationshipId";
+
+ String EMBEDDED_STORAGE_CLASS_ID = "embeddedStorageClassId";
+
+ String EMBEDDED_RESOURCE_TYPE_KEY = "embeddedResourceType";
+
/**
* Some file formats can store information about their original
* file name/location or about their attachment's original file name/location.
@@ -106,7 +121,7 @@ public interface TikaCoreProperties {
, or the value might come from outside the document. This information
* may be faulty and should be treated only as a hint.
*/
- public static final Property CONTENT_TYPE_HINT =
+ Property CONTENT_TYPE_HINT =
Property.internalText(HttpHeaders.CONTENT_TYPE+"-Hint");
Property CONTENT_TYPE_OVERRIDE =
@@ -115,144 +130,118 @@ public interface TikaCoreProperties {
/**
* @see DublinCore#FORMAT
*/
- public static final Property FORMAT = Property.composite(DublinCore.FORMAT,
- new Property[] { Property.internalText(Metadata.FORMAT) });
+ Property FORMAT = DublinCore.FORMAT;
- /**
+ /**
* @see DublinCore#IDENTIFIER
*/
- public static final Property IDENTIFIER = Property.composite(DublinCore.IDENTIFIER,
- new Property[] { Property.internalText(Metadata.IDENTIFIER) });
+ Property IDENTIFIER = DublinCore.IDENTIFIER;
- /**
+ /**
* @see DublinCore#CONTRIBUTOR
*/
- public static final Property CONTRIBUTOR = Property.composite(DublinCore.CONTRIBUTOR,
- new Property[] { Property.internalText(Metadata.CONTRIBUTOR) });
+ Property CONTRIBUTOR = DublinCore.CONTRIBUTOR;
/**
* @see DublinCore#COVERAGE
*/
- public static final Property COVERAGE = Property.composite(DublinCore.COVERAGE,
- new Property[] { Property.internalText(Metadata.COVERAGE) });
+ Property COVERAGE = DublinCore.COVERAGE;
/**
* @see DublinCore#CREATOR
*/
- public static final Property CREATOR = Property.composite(DublinCore.CREATOR,
+ Property CREATOR = Property.composite(DublinCore.CREATOR,
new Property[] {
Office.AUTHOR,
- Property.internalTextBag(Metadata.CREATOR),
- Property.internalTextBag(Metadata.AUTHOR)
});
/**
* @see Office#LAST_AUTHOR
*/
- public static final Property MODIFIER = Property.composite(Office.LAST_AUTHOR,
- new Property[] { Property.internalText(Metadata.LAST_AUTHOR) });
+ Property MODIFIER = Office.LAST_AUTHOR;
/**
* @see XMP#CREATOR_TOOL
*/
- public static final Property CREATOR_TOOL = XMP.CREATOR_TOOL;
+ Property CREATOR_TOOL = XMP.CREATOR_TOOL;
/**
* @see DublinCore#LANGUAGE
*/
- public static final Property LANGUAGE = Property.composite(DublinCore.LANGUAGE,
- new Property[] { Property.internalText(Metadata.LANGUAGE) });
+ Property LANGUAGE = DublinCore.LANGUAGE;
/**
* @see DublinCore#PUBLISHER
*/
- public static final Property PUBLISHER = Property.composite(DublinCore.PUBLISHER,
- new Property[] { Property.internalText(Metadata.PUBLISHER) });
+ Property PUBLISHER = DublinCore.PUBLISHER;
/**
* @see DublinCore#RELATION
*/
- public static final Property RELATION = Property.composite(DublinCore.RELATION,
- new Property[] { Property.internalText(Metadata.RELATION) });
-
+ Property RELATION = DublinCore.RELATION;
+
/**
* @see DublinCore#RIGHTS
*/
- public static final Property RIGHTS = Property.composite(DublinCore.RIGHTS,
- new Property[] { Property.internalText(Metadata.RIGHTS) });
+ Property RIGHTS = DublinCore.RIGHTS;
/**
* @see DublinCore#SOURCE
*/
- public static final Property SOURCE = Property.composite(DublinCore.SOURCE,
- new Property[] { Property.internalText(Metadata.SOURCE) });
+ Property SOURCE = DublinCore.SOURCE;
/**
* @see DublinCore#TYPE
*/
- public static final Property TYPE = Property.composite(DublinCore.TYPE,
- new Property[] { Property.internalText(Metadata.TYPE) });
-
+ Property TYPE = DublinCore.TYPE;
// Descriptive properties
/**
* @see DublinCore#TITLE
*/
- public static final Property TITLE = Property.composite(DublinCore.TITLE,
- new Property[] { Property.internalText(Metadata.TITLE) });
+ Property TITLE = DublinCore.TITLE;
/**
* @see DublinCore#DESCRIPTION
*/
- public static final Property DESCRIPTION = Property.composite(DublinCore.DESCRIPTION,
- new Property[] { Property.internalText(Metadata.DESCRIPTION) });
+ Property DESCRIPTION = DublinCore.DESCRIPTION;
/**
- * @see DublinCore#SUBJECT
- * @see Office#KEYWORDS
+ * @see DublinCore#SUBJECT; should include both subject and keywords
+ * if a document format has both. See also {@link Office#KEYWORDS}
+ * and {@link OfficeOpenXMLCore#SUBJECT}.
*/
- public static final Property KEYWORDS = Property.composite(DublinCore.SUBJECT,
- new Property[] {
- Office.KEYWORDS,
- Property.internalTextBag(MSOffice.KEYWORDS),
- Property.internalTextBag(Metadata.SUBJECT)
- });
-
+ Property SUBJECT = DublinCore.SUBJECT;
+
// Date related properties
/**
* @see DublinCore#DATE
* @see Office#CREATION_DATE
*/
- public static final Property CREATED = Property.composite(DublinCore.CREATED,
+ Property CREATED = Property.composite(DublinCore.CREATED,
new Property[] {
Office.CREATION_DATE,
- MSOffice.CREATION_DATE
});
/**
* @see DublinCore#MODIFIED
- * @see Metadata#DATE
- * @see Office#SAVE_DATE
+ * @see Office#SAVE_DATE
*/
- public static final Property MODIFIED = Property.composite(DublinCore.MODIFIED,
+ Property MODIFIED = Property.composite(DublinCore.MODIFIED,
new Property[] {
- Metadata.DATE,
- Office.SAVE_DATE,
- MSOffice.LAST_SAVED,
- Property.internalText(Metadata.MODIFIED),
+ Office.SAVE_DATE,
Property.internalText("Last-Modified")
});
/** @see Office#PRINT_DATE */
- public static final Property PRINT_DATE = Property.composite(Office.PRINT_DATE,
- new Property[] { MSOffice.LAST_PRINTED });
+ Property PRINT_DATE = Office.PRINT_DATE;
/**
* @see XMP#METADATA_DATE
*/
- public static final Property METADATA_DATE = XMP.METADATA_DATE;
+ Property METADATA_DATE = XMP.METADATA_DATE;
// Geographic related properties
@@ -260,17 +249,17 @@ public interface TikaCoreProperties {
/**
* @see Geographic#LATITUDE
*/
- public static final Property LATITUDE = Geographic.LATITUDE;
+ Property LATITUDE = Geographic.LATITUDE;
/**
* @see Geographic#LONGITUDE
*/
- public static final Property LONGITUDE = Geographic.LONGITUDE;
+ Property LONGITUDE = Geographic.LONGITUDE;
/**
* @see Geographic#ALTITUDE
*/
- public static final Property ALTITUDE = Geographic.ALTITUDE;
+ Property ALTITUDE = Geographic.ALTITUDE;
// Comment and rating properties
@@ -278,56 +267,21 @@ public interface TikaCoreProperties {
/**
* @see XMP#RATING
*/
- public static final Property RATING = XMP.RATING;
+ Property RATING = XMP.RATING;
/**
* @see OfficeOpenXMLExtended#COMMENTS
*/
- public static final Property COMMENTS = Property.composite(OfficeOpenXMLExtended.COMMENTS,
+ Property COMMENTS = Property.composite(OfficeOpenXMLExtended.COMMENTS,
new Property[] {
- Property.internalTextBag(ClimateForcast.COMMENT),
- Property.internalTextBag(MSOffice.COMMENTS)
+ Property.internalTextBag(ClimateForcast.COMMENT)
});
-
- // TODO: Remove transition properties in Tika 2.0
-
- /**
- * @see DublinCore#SUBJECT
- * @deprecated use TikaCoreProperties#KEYWORDS
- */
- @Deprecated
- public static final Property TRANSITION_KEYWORDS_TO_DC_SUBJECT = Property.composite(DublinCore.SUBJECT,
- new Property[] { Property.internalTextBag(MSOffice.KEYWORDS) });
-
- /**
- * @see OfficeOpenXMLExtended#COMMENTS
- * @deprecated use TikaCoreProperties#DESCRIPTION
- */
- @Deprecated
- public static final Property TRANSITION_SUBJECT_TO_DC_DESCRIPTION = Property.composite(DublinCore.DESCRIPTION,
- new Property[] { Property.internalText(Metadata.SUBJECT) });
-
- /**
- * @see DublinCore#TITLE
- * @deprecated use TikaCoreProperties#TITLE
- */
- @Deprecated
- public static final Property TRANSITION_SUBJECT_TO_DC_TITLE = Property.composite(DublinCore.TITLE,
- new Property[] { Property.internalText(Metadata.SUBJECT) });
-
- /**
- * @see OfficeOpenXMLCore#SUBJECT
- * @deprecated use OfficeOpenXMLCore#SUBJECT
- */
- @Deprecated
- public static final Property TRANSITION_SUBJECT_TO_OO_SUBJECT = Property.composite(OfficeOpenXMLCore.SUBJECT,
- new Property[] { Property.internalText(Metadata.SUBJECT) });
/**
* See {@link #EMBEDDED_RESOURCE_TYPE}
*/
- public static final Property EMBEDDED_RESOURCE_TYPE =
- Property.internalClosedChoise(TikaMetadataKeys.EMBEDDED_RESOURCE_TYPE,
+ Property EMBEDDED_RESOURCE_TYPE =
+ Property.internalClosedChoise(EMBEDDED_RESOURCE_TYPE_KEY,
new String[]{EmbeddedResourceType.ATTACHMENT.toString(), EmbeddedResourceType.INLINE.toString()});
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/TikaMetadataKeys.java b/tika-core/src/main/java/org/apache/tika/metadata/TikaMetadataKeys.java
deleted file mode 100644
index ce40a11..0000000
--- a/tika-core/src/main/java/org/apache/tika/metadata/TikaMetadataKeys.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.metadata;
-
-/**
- * Contains keys to properties in Metadata instances.
- */
-public interface TikaMetadataKeys {
-
- String RESOURCE_NAME_KEY = "resourceName";
-
- String PROTECTED = "protected";
-
- String EMBEDDED_RELATIONSHIP_ID = "embeddedRelationshipId";
-
- String EMBEDDED_STORAGE_CLASS_ID = "embeddedStorageClassId";
-
- String EMBEDDED_RESOURCE_TYPE = "embeddedResourceType";
-
-}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/WordPerfect.java b/tika-core/src/main/java/org/apache/tika/metadata/WordPerfect.java
index 12ca174..eac3504 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/WordPerfect.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/WordPerfect.java
@@ -26,41 +26,41 @@ public interface WordPerfect {
*/
Property FILE_SIZE = Property.internalText(
WORDPERFECT_METADATA_NAME_PREFIX
- + Metadata.NAMESPACE_PREFIX_DELIMITER + "FileSize");
+ + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "FileSize");
/**
* File identifier.
*/
Property FILE_ID = Property.internalText(
WORDPERFECT_METADATA_NAME_PREFIX
- + Metadata.NAMESPACE_PREFIX_DELIMITER + "FileId");
+ + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "FileId");
/**
* Product type.
*/
Property PRODUCT_TYPE = Property.internalInteger(
WORDPERFECT_METADATA_NAME_PREFIX
- + Metadata.NAMESPACE_PREFIX_DELIMITER + "ProductType");
+ + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "ProductType");
/**
* File type.
*/
Property FILE_TYPE = Property.internalInteger(
WORDPERFECT_METADATA_NAME_PREFIX
- + Metadata.NAMESPACE_PREFIX_DELIMITER + "FileType");
+ + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "FileType");
/**
* Major version.
*/
Property MAJOR_VERSION = Property.internalInteger(
WORDPERFECT_METADATA_NAME_PREFIX
- + Metadata.NAMESPACE_PREFIX_DELIMITER + "MajorVersion");
+ + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "MajorVersion");
/**
* Minor version.
*/
Property MINOR_VERSION = Property.internalInteger(
WORDPERFECT_METADATA_NAME_PREFIX
- + Metadata.NAMESPACE_PREFIX_DELIMITER + "MinorVersion");
+ + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "MinorVersion");
/**
* Is encrypted?.
*/
Property ENCRYPTED = Property.internalBoolean(
WORDPERFECT_METADATA_NAME_PREFIX
- + Metadata.NAMESPACE_PREFIX_DELIMITER + "Encrypted");
+ + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "Encrypted");
}
diff --git a/tika-core/src/main/java/org/apache/tika/metadata/XMP.java b/tika-core/src/main/java/org/apache/tika/metadata/XMP.java
index 0f8c7fc..43bca6b 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/XMP.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/XMP.java
@@ -23,7 +23,7 @@ public interface XMP {
String PREFIX = "xmp";
/** The xmp prefix followed by the colon delimiter */
- String PREFIX_ = PREFIX + Metadata.NAMESPACE_PREFIX_DELIMITER;
+ String PREFIX_ = PREFIX + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
/**
* The date and time the resource was created. For a digital file, this need not
diff --git a/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java b/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
index 46b9d8f..4acfe01 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
@@ -38,6 +38,7 @@ import org.apache.tika.detect.Detector;
import org.apache.tika.detect.TextDetector;
import org.apache.tika.detect.XmlRootExtractor;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
/**
* This class is a MimeType repository. It gathers a set of MimeTypes and
@@ -498,7 +499,7 @@ public final class MimeTypes implements Detector, Serializable {
}
// Get type based on resourceName hint (if available)
- String resourceName = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String resourceName = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (resourceName != null) {
String name = null;
diff --git a/tika-core/src/main/java/org/apache/tika/mime/ProbabilisticMimeDetectionSelector.java b/tika-core/src/main/java/org/apache/tika/mime/ProbabilisticMimeDetectionSelector.java
index f0f3869..3de1fa9 100644
--- a/tika-core/src/main/java/org/apache/tika/mime/ProbabilisticMimeDetectionSelector.java
+++ b/tika-core/src/main/java/org/apache/tika/mime/ProbabilisticMimeDetectionSelector.java
@@ -25,6 +25,7 @@ import java.util.List;
import org.apache.tika.detect.Detector;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
/**
* Selector for combining different mime detection results
@@ -141,7 +142,7 @@ public class ProbabilisticMimeDetectionSelector implements Detector {
MimeType extHint = null;
// Get type based on resourceName hint (if available)
- String resourceName = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String resourceName = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (resourceName != null) {
String name = null;
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java b/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java
index 0f334e3..111128e 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java
@@ -31,6 +31,7 @@ import java.nio.file.Path;
import java.util.concurrent.Executor;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
@@ -91,7 +92,7 @@ public class ParsingReader extends Reader {
private static Metadata getMetadata(String name) {
Metadata metadata = new Metadata();
if (name != null && name.length() > 0) {
- metadata.set(Metadata.RESOURCE_NAME_KEY, name);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
}
return metadata;
}
@@ -162,7 +163,7 @@ public class ParsingReader extends Reader {
ParseContext context) throws IOException {
this(parser, stream, metadata, context, new Executor() {
public void execute(Runnable command) {
- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name != null) {
name = "Apache Tika: " + name;
} else {
diff --git a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
index 739a9b3..3cba1f1 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
@@ -29,7 +29,6 @@ import org.apache.tika.io.FilenameUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.sax.ContentHandlerFactory;
import org.apache.tika.utils.ExceptionUtils;
@@ -246,10 +245,10 @@ public class RecursiveParserWrapper implements Parser {
private String getResourceName(Metadata metadata) {
String objectName = "";
- if (metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY) != null) {
- objectName = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY);
- } else if (metadata.get(TikaMetadataKeys.EMBEDDED_RELATIONSHIP_ID) != null) {
- objectName = metadata.get(TikaMetadataKeys.EMBEDDED_RELATIONSHIP_ID);
+ if (metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY) != null) {
+ objectName = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
+ } else if (metadata.get(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID) != null) {
+ objectName = metadata.get(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID);
} else {
objectName = "embedded-" + (++unknownCount);
}
diff --git a/tika-core/src/main/java/org/apache/tika/parser/digest/InputStreamDigester.java b/tika-core/src/main/java/org/apache/tika/parser/digest/InputStreamDigester.java
index 40a92a6..a208fab 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/digest/InputStreamDigester.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/digest/InputStreamDigester.java
@@ -156,7 +156,7 @@ public class InputStreamDigester implements DigestingParser.Digester {
private String getMetadataKey() {
return TikaCoreProperties.TIKA_META_PREFIX +
- "digest" + Metadata.NAMESPACE_PREFIX_DELIMITER +
+ "digest" + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER +
algorithmKeyName;
}
diff --git a/tika-core/src/test/java/org/apache/tika/TikaTest.java b/tika-core/src/test/java/org/apache/tika/TikaTest.java
index 153a564..0eaf675 100644
--- a/tika-core/src/test/java/org/apache/tika/TikaTest.java
+++ b/tika-core/src/test/java/org/apache/tika/TikaTest.java
@@ -37,6 +37,7 @@ import org.apache.tika.extractor.EmbeddedResourceHandler;
import org.apache.tika.io.IOUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
@@ -183,7 +184,7 @@ public abstract class TikaTest {
protected XMLResult getXML(String filePath, Parser parser) throws Exception {
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, filePath);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, filePath);
return getXML(filePath, parser, metadata);
}
diff --git a/tika-core/src/test/java/org/apache/tika/detect/MimeDetectionWithNNTest.java b/tika-core/src/test/java/org/apache/tika/detect/MimeDetectionWithNNTest.java
index c815607..3bbfb59 100644
--- a/tika-core/src/test/java/org/apache/tika/detect/MimeDetectionWithNNTest.java
+++ b/tika-core/src/test/java/org/apache/tika/detect/MimeDetectionWithNNTest.java
@@ -112,7 +112,7 @@ public class MimeDetectionWithNNTest {
expected, mime);
// Add resource name and test again
- // metadata.set(Metadata.RESOURCE_NAME_KEY, urlOrFileName);
+ // metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, urlOrFileName);
mime = this.detector.detect(in, metadata).toString();
assertEquals(urlOrFileName
+ " is not properly detected after adding resource name.",
diff --git a/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java b/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java
index 23fbb15..7e4839b 100644
--- a/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java
+++ b/tika-core/src/test/java/org/apache/tika/detect/NameDetectorTest.java
@@ -22,6 +22,7 @@ import java.util.Map;
import java.util.regex.Pattern;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import static org.junit.Assert.assertEquals;
@@ -96,7 +97,7 @@ public class NameDetectorTest {
private void assertDetect(MediaType type, String name){
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, name);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
try {
assertEquals(type, detector.detect(null, metadata));
} catch (IOException e) {
diff --git a/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java b/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
index 229d310..77c1ed9 100644
--- a/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
+++ b/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
@@ -29,6 +29,7 @@ import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.junit.Test;
public class TikaInputStreamTest {
@@ -101,7 +102,7 @@ public class TikaInputStreamTest {
URL url = TikaInputStreamTest.class.getResource("test.txt");
Metadata metadata = new Metadata();
TikaInputStream.get(url, metadata).close();
- assertEquals("test.txt", metadata.get(Metadata.RESOURCE_NAME_KEY));
+ assertEquals("test.txt", metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertEquals(
Long.toString(Files.size(Paths.get(url.toURI()))),
metadata.get(Metadata.CONTENT_LENGTH));
diff --git a/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java b/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
index 490e3e8..78db151 100644
--- a/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
+++ b/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
@@ -370,7 +370,7 @@ public class TestMetadata {
Metadata meta = new Metadata();
Property compositeProperty = Property.composite(
DublinCore.DESCRIPTION, new Property[] {
- Property.internalText(Metadata.DESCRIPTION),
+ TikaCoreProperties.DESCRIPTION,
Property.internalText("testDescriptionAlt")
});
String message = "composite description";
@@ -381,7 +381,6 @@ public class TestMetadata {
// Fetch as the primary property on the composite
assertEquals(message, meta.get(DublinCore.DESCRIPTION));
// Fetch as the aliases
- assertEquals(message, meta.get(Metadata.DESCRIPTION));
assertEquals(message, meta.get("testDescriptionAlt"));
}
diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
index 1f986da..3e1d52b 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
@@ -29,6 +29,7 @@ import java.net.URL;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.junit.Before;
import org.junit.Test;
@@ -156,7 +157,7 @@ public class MimeDetectionTest {
assertEquals(urlOrFileName + " is not properly detected: detected.", expected, mime);
//Add resource name and test again
- metadata.set(Metadata.RESOURCE_NAME_KEY, urlOrFileName);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, urlOrFileName);
mime = this.mimeTypes.detect(in, metadata).toString();
assertEquals(urlOrFileName + " is not properly detected after adding resource name.", expected, mime);
} finally {
@@ -180,7 +181,7 @@ public class MimeDetectionTest {
new ByteArrayInputStream(new byte[0]), new Metadata()));
Metadata namehint = new Metadata();
- namehint.set(Metadata.RESOURCE_NAME_KEY, "test.txt");
+ namehint.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.txt");
assertEquals(MediaType.TEXT_PLAIN, mimeTypes.detect(
new ByteArrayInputStream(new byte[0]), namehint));
@@ -231,16 +232,16 @@ public class MimeDetectionTest {
// With a filename, picks the right one
metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "test.hello.world");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.hello.world");
assertEquals(helloType, mimeTypes.detect(new ByteArrayInputStream(helloWorld), metadata));
metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "test.x-hello-world");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.x-hello-world");
assertEquals(helloXType, mimeTypes.detect(new ByteArrayInputStream(helloWorld), metadata));
// Without, goes for the one that sorts last
metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "testingTESTINGtesting");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "testingTESTINGtesting");
assertEquals(helloXType, mimeTypes.detect(new ByteArrayInputStream(helloWorld), metadata));
}
}
diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
index 8782167..b94b095 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
@@ -30,6 +30,7 @@ import java.util.Set;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.junit.Before;
import org.junit.Test;
@@ -216,11 +217,11 @@ public class MimeTypesReaderTest {
// Check that we can correct detect with the file one:
// By name
Metadata m = new Metadata();
- m.add(Metadata.RESOURCE_NAME_KEY, "test.hello.world");
+ m.add(TikaCoreProperties.RESOURCE_NAME_KEY, "test.hello.world");
assertEquals(hwf.toString(), this.mimeTypes.detect(null, m).toString());
m = new Metadata();
- m.add(Metadata.RESOURCE_NAME_KEY, "test.x-hello-world");
+ m.add(TikaCoreProperties.RESOURCE_NAME_KEY, "test.x-hello-world");
assertEquals(hxw.toString(), this.mimeTypes.detect(null, m).toString());
// By contents - picks the x one as that sorts later
@@ -245,7 +246,7 @@ public class MimeTypesReaderTest {
"src/test/resources/org/apache/tika/mime/external-mimetypes.xml");
MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(new CustomClassLoader());
Metadata m = new Metadata();
- m.add(Metadata.RESOURCE_NAME_KEY, "test.external.mime.type");
+ m.add(TikaCoreProperties.RESOURCE_NAME_KEY, "test.external.mime.type");
assertEquals("external/mime-type", mimeTypes.detect(null, m).toString());
}
diff --git a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
index ecd3330..e285449 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
@@ -29,6 +29,7 @@ import java.net.URL;
import java.nio.charset.StandardCharsets;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.junit.Before;
import org.junit.Test;
@@ -154,7 +155,7 @@ public class ProbabilisticMimeDetectionTest {
expected, mime);
// Add resource name and test again
- metadata.set(Metadata.RESOURCE_NAME_KEY, urlOrFileName);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, urlOrFileName);
mime = this.proDetector.detect(in, metadata).toString();
assertEquals(urlOrFileName
+ " is not properly detected after adding resource name.",
@@ -181,7 +182,7 @@ public class ProbabilisticMimeDetectionTest {
new ByteArrayInputStream(new byte[0]), new Metadata()));
Metadata namehint = new Metadata();
- namehint.set(Metadata.RESOURCE_NAME_KEY, "test.txt");
+ namehint.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.txt");
assertEquals(MediaType.TEXT_PLAIN, proDetector.detect(
new ByteArrayInputStream(new byte[0]), namehint));
@@ -232,18 +233,18 @@ public class ProbabilisticMimeDetectionTest {
// With a filename, picks the right one
metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "test.hello.world");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.hello.world");
assertEquals(helloType, proDetector.detect(
new ByteArrayInputStream(helloWorld), metadata));
metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "test.x-hello-world");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.x-hello-world");
assertEquals(helloXType, proDetector.detect(
new ByteArrayInputStream(helloWorld), metadata));
// Without, goes for the one that sorts last
metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "testingTESTINGtesting");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "testingTESTINGtesting");
assertEquals(helloXType, proDetector.detect(
new ByteArrayInputStream(helloWorld), metadata));
}
diff --git a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
index 5605300..871f975 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
@@ -32,6 +32,7 @@ import org.apache.tika.Tika;
import org.apache.tika.config.ServiceLoader;
import org.apache.tika.detect.DefaultProbDetector;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.ProbabilisticMimeDetectionSelector.Builder;
import org.junit.Before;
import org.junit.Test;
@@ -175,7 +176,7 @@ public class ProbabilisticMimeDetectionTestWithTika {
expected, mime);
// Add resource name and test again
- metadata.set(Metadata.RESOURCE_NAME_KEY, urlOrFileName);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, urlOrFileName);
// mime = this.proDetector.detect(in, metadata).toString();
mime = tika.detect(in, metadata).toString();
assertEquals(urlOrFileName
@@ -198,7 +199,7 @@ public class ProbabilisticMimeDetectionTestWithTika {
new ByteArrayInputStream(new byte[0]), new Metadata()));
Metadata namehint = new Metadata();
- namehint.set(Metadata.RESOURCE_NAME_KEY, "test.txt");
+ namehint.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.txt");
assertEquals(MediaType.TEXT_PLAIN.toString(),
tika.detect(new ByteArrayInputStream(new byte[0]), namehint));
@@ -249,18 +250,18 @@ public class ProbabilisticMimeDetectionTestWithTika {
// With a filename, picks the right one
metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "test.hello.world");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.hello.world");
assertEquals(helloType.toString(),
tika.detect(new ByteArrayInputStream(helloWorld), metadata));
metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "test.x-hello-world");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test.x-hello-world");
assertEquals(helloXType.toString(),
tika.detect(new ByteArrayInputStream(helloWorld), metadata));
// Without, goes for the one that sorts last
metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "testingTESTINGtesting");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "testingTESTINGtesting");
assertEquals(helloXType.toString(),
tika.detect(new ByteArrayInputStream(helloWorld), metadata));
}
diff --git a/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java b/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
index 510ca69..346850f 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
@@ -36,7 +36,7 @@ import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
import org.apache.tika.io.IOExceptionWithCause;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
@@ -149,7 +149,7 @@ public class MockParser extends AbstractParser {
String embeddedText = action.getTextContent();
EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor(context);
Metadata m = new Metadata();
- m.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName);
+ m.set(TikaCoreProperties.RESOURCE_NAME_KEY, fileName);
if (! "".equals(contentType)) {
m.set(Metadata.CONTENT_TYPE, contentType);
}
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java b/tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java
index fd1c382..f142c5b 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/ExtractComparer.java
@@ -89,7 +89,7 @@ public class ExtractComparer extends AbstractProfiler {
}
private static final String DIGEST_KEY_PREFIX = TikaCoreProperties.TIKA_META_PREFIX+
- "digest"+Metadata.NAMESPACE_PREFIX_DELIMITER;
+ "digest"+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
public static void USAGE() {
HelpFormatter helpFormatter = new HelpFormatter();
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/io/ExtractReader.java b/tika-eval/src/main/java/org/apache/tika/eval/io/ExtractReader.java
index 2a46b84..7bafa97 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/io/ExtractReader.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/io/ExtractReader.java
@@ -20,6 +20,7 @@ import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadataList;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.RecursiveParserWrapper;
@@ -182,7 +183,7 @@ public class ExtractReader {
//Let's hope the file name has a suffix that can
//be used to determine the mime. Could be wrong or missing,
//but better than nothing.
- m.set(Metadata.RESOURCE_NAME_KEY, fileSuffixes.originalFileName);
+ m.set(TikaCoreProperties.RESOURCE_NAME_KEY, fileSuffixes.originalFileName);
MediaType mimeType = tikaConfig.getMimeRepository().detect(null, m);
if (mimeType != null) {
diff --git a/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java b/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
index fe35bcb..773b9df 100644
--- a/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
+++ b/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
@@ -29,6 +29,7 @@ import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.ParsingEmbeddedDocumentExtractor;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.parser.AutoDetectParser;
@@ -74,7 +75,7 @@ public class ExtractEmbeddedFiles {
throws SAXException, IOException {
//try to get the name of the embedded file from the metadata
- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name == null) {
name = "file_" + fileCount++;
diff --git a/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java b/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
index 514f05c..dcfbc0f 100755
--- a/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
+++ b/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
@@ -32,6 +32,7 @@ import org.apache.jackrabbit.server.io.ImportContext;
import org.apache.jackrabbit.webdav.io.InputContext;
import org.apache.tika.detect.Detector;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -85,7 +86,7 @@ public class ImportContextImpl implements ImportContext {
metadata.set(Metadata.CONTENT_TYPE, ctx.getContentType());
}
if (systemId != null) {
- metadata.set(Metadata.RESOURCE_NAME_KEY, systemId);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, systemId);
}
if (stream != null && !stream.markSupported()) {
stream = new BufferedInputStream(stream);
diff --git a/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java b/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
index 5c6a9d4..23a4d8c 100755
--- a/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
+++ b/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
@@ -31,6 +31,7 @@ import org.apache.tika.Tika;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
/**
* Builds on the LuceneIndexer from Chapter 5 and adds indexing of Metadata.
@@ -63,15 +64,15 @@ public class MetadataAwareLuceneIndexer {
public void indexWithDublinCore(File file) throws Exception {
Metadata met = new Metadata();
- met.add(Metadata.CREATOR, "Manning");
- met.add(Metadata.CREATOR, "Tika in Action");
- met.set(Metadata.DATE, new Date());
- met.set(Metadata.FORMAT, tika.detect(file));
+ met.add(TikaCoreProperties.CREATOR, "Manning");
+ met.add(TikaCoreProperties.CREATOR, "Tika in Action");
+ met.set(TikaCoreProperties.CREATED, new Date());
+ met.set(TikaCoreProperties.FORMAT, tika.detect(file));
met.set(DublinCore.SOURCE, file.toURI().toURL().toString());
- met.add(Metadata.SUBJECT, "File");
- met.add(Metadata.SUBJECT, "Indexing");
- met.add(Metadata.SUBJECT, "Metadata");
- met.set(Property.externalClosedChoise(Metadata.RIGHTS, "public",
+ met.add(TikaCoreProperties.SUBJECT, "File");
+ met.add(TikaCoreProperties.SUBJECT, "Indexing");
+ met.add(TikaCoreProperties.SUBJECT, "Metadata");
+ met.set(Property.externalClosedChoise(TikaCoreProperties.RIGHTS.getName(), "public",
"private"), "public");
try (InputStream is = new FileInputStream(file)) {
tika.parse(is, met);
diff --git a/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java b/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
index fe0c8d9..e6fef04 100755
--- a/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
+++ b/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
@@ -30,6 +30,7 @@ import org.apache.tika.langdetect.OptimaizeLangDetector;
import org.apache.tika.language.detect.LanguageDetector;
import org.apache.tika.language.detect.LanguageResult;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.parser.AutoDetectParser;
@@ -84,7 +85,7 @@ public class MyFirstTika {
System.out.println("Examining: [" + filename + "]");
- metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, filename);
System.out.println("The MIME type (based on filename) is: ["
+ mimeRegistry.detect(null, metadata) + "]");
diff --git a/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java b/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
index 5e3201f..a5d2c3b 100644
--- a/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
+++ b/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
@@ -30,6 +30,7 @@ import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadataList;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
@@ -165,7 +166,7 @@ public class ParsingExample {
RecursiveParserWrapper wrapper = new RecursiveParserWrapper(p, factory);
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded.docx");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test_recursive_embedded.docx");
ParseContext context = new ParseContext();
try (InputStream stream = ParsingExample.class.getResourceAsStream("test_recursive_embedded.docx")) {
diff --git a/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java b/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
index d6a259b..88e8a4c 100755
--- a/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
+++ b/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
@@ -36,6 +36,7 @@ import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
/**
* Builds on top of the LuceneIndexer and the Metadata discussions in Chapter 6
@@ -62,7 +63,7 @@ public class RecentFiles {
String nowDateTime = ISO8601.format(gc);
gc.add(java.util.GregorianCalendar.MINUTE, -5);
String fiveMinsAgo = ISO8601.format(gc);
- TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(),
+ TermRangeQuery query = new TermRangeQuery(TikaCoreProperties.CREATED.getName(),
fiveMinsAgo, nowDateTime, true, true);
TopScoreDocCollector collector = TopScoreDocCollector.create(20,
true);
@@ -87,16 +88,16 @@ public class RecentFiles {
output.append("<item>");
output.append(emitTag("guid", doc.get(DublinCore.SOURCE.getName()),
"isPermalink", "true"));
- output.append(emitTag("title", doc.get(Metadata.TITLE), null, null));
+ output.append(emitTag("title", doc.get(TikaCoreProperties.TITLE.getName()), null, null));
output.append(emitTag("link", doc.get(DublinCore.SOURCE.getName()),
null, null));
- output.append(emitTag("author", doc.get(Metadata.CREATOR), null, null));
- for (String topic : doc.getValues(Metadata.SUBJECT)) {
+ output.append(emitTag("author", doc.get(TikaCoreProperties.CREATOR.getName()), null, null));
+ for (String topic : doc.getValues(TikaCoreProperties.SUBJECT.getName())) {
output.append(emitTag("category", topic, null, null));
}
output.append(emitTag("pubDate", rssDateFormat.format(ISO8601.parse(doc
- .get(Metadata.DATE.toString()))), null, null));
- output.append(emitTag("description", doc.get(Metadata.TITLE), null,
+ .get(TikaCoreProperties.CREATED.getName()))), null, null));
+ output.append(emitTag("description", doc.get(TikaCoreProperties.TITLE.getName()), null,
null));
output.append("</item>");
return output.toString();
diff --git a/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java b/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java
index d263822..23a33e4 100755
--- a/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java
+++ b/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java
@@ -26,6 +26,7 @@ import java.util.Date;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
/**
* Generates document summaries for corpus analysis in the Open Relevance
@@ -39,8 +40,8 @@ public class TrecDocumentGenerator {
Metadata met = new Metadata();
String contents = tika.parseToString(new FileInputStream(file), met);
- return new TrecDocument(met.get(Metadata.RESOURCE_NAME_KEY), contents,
- met.getDate(Metadata.DATE));
+ return new TrecDocument(met.get(TikaCoreProperties.RESOURCE_NAME_KEY), contents,
+ met.getDate(TikaCoreProperties.CREATED));
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java b/tika-parsers/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java
index c8ea317..45597da 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/asm/XHTMLClassVisitor.java
@@ -81,7 +81,7 @@ class XHTMLClassVisitor extends ClassVisitor {
}
metadata.set(TikaCoreProperties.TITLE, className);
- metadata.set(Metadata.RESOURCE_NAME_KEY, className + ".class");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, className + ".class");
try {
xhtml.startDocument();
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java
index bfeb89f..162b1be 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/code/SourceCodeParser.java
@@ -93,7 +93,7 @@ public class SourceCodeParser extends AbstractEncodingDetectorParser {
new CloseShieldInputStream(stream), metadata, getEncodingDetector(context))) {
Charset charset = reader.getCharset();
String mediaType = metadata.get(Metadata.CONTENT_TYPE);
- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (mediaType != null && name != null) {
MediaType type = MediaType.parse(mediaType);
metadata.set(Metadata.CONTENT_TYPE, type.toString());
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
index 2cfb53d..c54baf4 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
@@ -55,9 +55,9 @@ public class DWGParser extends AbstractParser {
/** The order of the fields in the header */
private static final Property[] HEADER_PROPERTIES_ENTRIES = {
TikaCoreProperties.TITLE,
- TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_DESCRIPTION,
+ TikaCoreProperties.DESCRIPTION,
TikaCoreProperties.CREATOR,
- TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT,
+ TikaCoreProperties.SUBJECT,
TikaCoreProperties.COMMENTS,
TikaCoreProperties.MODIFIER,
null, // Unknown?
@@ -69,11 +69,11 @@ public class DWGParser extends AbstractParser {
null,
TikaCoreProperties.RELATION, // 0x01
TikaCoreProperties.TITLE, // 0x02
- TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_DESCRIPTION, // 0x03
+ TikaCoreProperties.DESCRIPTION, // 0x03
TikaCoreProperties.CREATOR, // 0x04
null,
TikaCoreProperties.COMMENTS,// 0x06
- TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT, // 0x07
+ TikaCoreProperties.SUBJECT, // 0x07
TikaCoreProperties.MODIFIER, // 0x08
};
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/executable/ExecutableParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/executable/ExecutableParser.java
index b962e42..f7f2552 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/executable/ExecutableParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/executable/ExecutableParser.java
@@ -29,6 +29,7 @@ import org.apache.poi.util.LittleEndian;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.EndianUtils;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
@@ -127,7 +128,7 @@ public class ExecutableParser extends AbstractParser implements MachineMetadata
// Turn this into helpful metadata
Date createdAtD = new Date(createdAt*1000l);
- metadata.set(Metadata.CREATION_DATE, createdAtD);
+ metadata.set(TikaCoreProperties.CREATED, createdAtD);
switch(machine) {
case 0x14c:
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java
index 000ff10..ea6f4ee 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/font/AdobeFontMetricParser.java
@@ -146,7 +146,7 @@ public class AdobeFontMetricParser extends AbstractParser {
// If appropriate date then store as metadata
if( date != null ) {
- addMetadataByProperty( metadata, Metadata.CREATION_DATE, date );
+ addMetadataByProperty( metadata, TikaCoreProperties.CREATED, date );
}
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
index 821493b..d534b19 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
@@ -27,6 +27,7 @@ import java.util.Set;
import org.apache.commons.io.IOUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
@@ -80,7 +81,7 @@ public class HDFParser extends AbstractParser {
ByteArrayOutputStream os = new ByteArrayOutputStream();
IOUtils.copy(stream, os);
- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name == null) {
name = "";
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlHandler.java
index 1febbf4..0910097 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlHandler.java
@@ -77,7 +77,7 @@ class HtmlHandler extends TextContentHandler {
this.extractScripts = extractScripts;
// Try to determine the default base URL, if one has not been given
if (metadata.get(Metadata.CONTENT_LOCATION) == null) {
- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name != null) {
name = name.trim();
try {
@@ -304,7 +304,7 @@ class HtmlHandler extends TextContentHandler {
}
//do anything with attrs?
Metadata m = new Metadata();
- m.set(Metadata.EMBEDDED_RESOURCE_TYPE,
+ m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
String src = scriptAtts.getValue("src");
if (src != null) {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
index d2ee0c6..e17c41c 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageMetadataExtractor.java
@@ -57,7 +57,6 @@ import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TIFF;
import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.image.xmp.JempboxExtractor;
import org.apache.tika.utils.XMLReaderUtils;
import org.w3c.dom.Document;
@@ -542,7 +541,7 @@ public class ImageMetadataExtractor {
if (directory.containsTag(IptcDirectory.TAG_KEYWORDS)) {
String[] keywords = directory.getStringArray(IptcDirectory.TAG_KEYWORDS);
for (String k : keywords) {
- metadata.add(TikaCoreProperties.KEYWORDS, k);
+ metadata.add(TikaCoreProperties.SUBJECT, k);
}
}
if (directory.containsTag(IptcDirectory.TAG_HEADLINE)) {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
index e847615..dff64dc 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/JempboxExtractor.java
@@ -35,7 +35,6 @@ import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMPMM;
-import org.apache.tika.parser.ParseContext;
import org.apache.tika.utils.DateUtils;
import org.apache.tika.utils.XMLReaderUtils;
import org.w3c.dom.Document;
@@ -108,9 +107,9 @@ public class JempboxExtractor {
}
if (dc.getSubjects() != null && dc.getSubjects().size() > 0) {
for (String keyword : dc.getSubjects()) {
- metadata.add(TikaCoreProperties.KEYWORDS, keyword);
+ metadata.add(TikaCoreProperties.SUBJECT, keyword);
}
- // TODO should we set KEYWORDS too?
+ // TODO should we set SUBJECT too?
// All tested photo managers set the same in Iptc.Application2.Keywords and Xmp.dc.subject
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
index ec436e0..3904244 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/iptc/IptcAnpaParser.java
@@ -774,7 +774,7 @@ public class IptcAnpaParser implements Parser {
// in other consuming applications, like Lucene
metadata.set(Metadata.CONTENT_TYPE, clean("text/anpa-1312"));
metadata.set(TikaCoreProperties.TITLE, clean(properties.get("title")));
- metadata.set(TikaCoreProperties.KEYWORDS, clean(properties.get("subject")));
+ metadata.set(TikaCoreProperties.SUBJECT, clean(properties.get("subject")));
metadata.set(TikaCoreProperties.CREATOR, clean(properties.get("author")));
metadata.set(TikaCoreProperties.CREATED, clean(properties.get("created")));
metadata.set(TikaCoreProperties.MODIFIED, clean(properties.get("modified")));
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/iwork/KeynoteContentHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/iwork/KeynoteContentHandler.java
index c60f955..4509cd2 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/iwork/KeynoteContentHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/iwork/KeynoteContentHandler.java
@@ -17,6 +17,7 @@
package org.apache.tika.parser.iwork;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.Attributes;
@@ -54,7 +55,7 @@ class KeynoteContentHandler extends DefaultHandler {
@Override
public void endDocument() throws SAXException {
- metadata.set(Metadata.SLIDE_COUNT, String.valueOf(numberOfSlides));
+ metadata.set(Office.SLIDE_COUNT, String.valueOf(numberOfSlides));
}
@Override
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/iwork/NumbersContentHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/iwork/NumbersContentHandler.java
index 0d3dfd1..2ee64be 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/iwork/NumbersContentHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/iwork/NumbersContentHandler.java
@@ -17,6 +17,7 @@
package org.apache.tika.parser.iwork;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.sax.XHTMLContentHandler;
@@ -56,7 +57,7 @@ class NumbersContentHandler extends DefaultHandler {
@Override
public void endDocument() throws SAXException {
- metadata.set(Metadata.PAGE_COUNT, String.valueOf(numberOfSheets));
+ metadata.set(Office.PAGE_COUNT, String.valueOf(numberOfSheets));
}
@Override
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java
index 9b45769..30d582c 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/iwork/PagesContentHandler.java
@@ -16,7 +16,9 @@
*/
package org.apache.tika.parser.iwork;
+import org.apache.tika.Tika;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.sax.XHTMLContentHandler;
@@ -71,7 +73,7 @@ class PagesContentHandler extends DefaultHandler {
@Override
public void endDocument() throws SAXException {
- metadata.set(Metadata.PAGE_COUNT, String.valueOf(pageCount));
+ metadata.set(Office.PAGE_COUNT, String.valueOf(pageCount));
if (pageCount > 0) {
doFooter();
xhtml.endElement("div");
@@ -286,7 +288,7 @@ class PagesContentHandler extends DefaultHandler {
} else if ("sl:SLCreationDateProperty".equals(metaDataQName)) {
metaDataKey = TikaCoreProperties.CREATED;
} else if ("sl:SLLastModifiedDateProperty".equals(metaDataQName)) {
- metaDataKey = Metadata.LAST_MODIFIED;
+ metaDataKey = TikaCoreProperties.MODIFIED;
} else if ("sl:language".equals(metaDataQName)) {
metaDataKey = TikaCoreProperties.LANGUAGE;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java b/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
index f6691e0..af8d1d8 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/jdbc/JDBCTableReader.java
@@ -41,7 +41,7 @@ import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Database;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
@@ -190,7 +190,7 @@ class JDBCTableReader {
m.set(Database.PREFIX + "IS_CLOB_TRUNCATED", Boolean.toString(truncated));
m.set(Metadata.CONTENT_TYPE, "text/plain; charset=UTF-8");
m.set(Metadata.CONTENT_LENGTH, Integer.toString(readSize));
- m.set(TikaMetadataKeys.RESOURCE_NAME_KEY,
+ m.set(TikaCoreProperties.RESOURCE_NAME_KEY,
//just in case something screwy is going on with the column name
FilenameUtils.normalize(FilenameUtils.getName(columnName + "_" + rowNum + ".txt")));
@@ -224,7 +224,7 @@ class JDBCTableReader {
handler.startElement("", "span", "span", attrs);
String extension = embeddedDocumentUtil.getExtension(is, m);
- m.set(TikaMetadataKeys.RESOURCE_NAME_KEY,
+ m.set(TikaCoreProperties.RESOURCE_NAME_KEY,
//just in case something screwy is going on with the column name
FilenameUtils.normalize(FilenameUtils.getName(columnName + "_" + rowNum + extension)));
if (embeddedDocumentUtil.shouldParseEmbedded(m)) {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
index 40db8f3..ee3d2e4 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mail/MailContentHandler.java
@@ -198,7 +198,7 @@ class MailContentHandler implements ContentHandler {
String contentDispositionFileName = maximalBody.getContentDispositionFilename();
if ( contentDispositionFileName != null ) {
- submd.set( Metadata.RESOURCE_NAME_KEY, contentDispositionFileName );
+ submd.set( TikaCoreProperties.RESOURCE_NAME_KEY, contentDispositionFileName );
}
submd.set( Metadata.CONTENT_DISPOSITION, contentDisposition.toString() );
@@ -354,7 +354,9 @@ class MailContentHandler implements ContentHandler {
metadata.add(TikaCoreProperties.CREATOR, from);
}
} else if (fieldname.equalsIgnoreCase("Subject")) {
- metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE,
+ metadata.set(TikaCoreProperties.TITLE,
+ ((UnstructuredField) parsedField).getValue());
+ metadata.set(TikaCoreProperties.SUBJECT,
((UnstructuredField) parsedField).getValue());
} else if (fieldname.equalsIgnoreCase("To")) {
processAddressList(parsedField, "To:", Metadata.MESSAGE_TO);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
index 4aa1d67..758a7b0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
@@ -190,7 +190,7 @@ public class MboxParser extends AbstractParser {
}
metadata.add(property, headerContent);
} else if (headerTag.equalsIgnoreCase("Subject")) {
- metadata.add(Metadata.SUBJECT, headerContent);
+ metadata.add(TikaCoreProperties.SUBJECT, headerContent);
} else if (headerTag.equalsIgnoreCase("Date")) {
try {
Date date = parseDate(headerContent);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
index 61d7bac..05b67e5 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mbox/OutlookPSTParser.java
@@ -152,8 +152,8 @@ public class OutlookPSTParser extends AbstractParser {
private void parserMailItem(XHTMLContentHandler handler, PSTMessage pstMail, Metadata mailMetadata,
EmbeddedDocumentExtractor embeddedExtractor) throws SAXException, IOException {
- mailMetadata.set(Metadata.RESOURCE_NAME_KEY, pstMail.getInternetMessageId());
- mailMetadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, pstMail.getInternetMessageId());
+ mailMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, pstMail.getInternetMessageId());
+ mailMetadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, pstMail.getInternetMessageId());
mailMetadata.set(TikaCoreProperties.IDENTIFIER, pstMail.getInternetMessageId());
mailMetadata.set(TikaCoreProperties.TITLE, pstMail.getSubject());
mailMetadata.set(Metadata.MESSAGE_FROM, pstMail.getSenderName());
@@ -241,8 +241,8 @@ public class OutlookPSTParser extends AbstractParser {
xhtml.element("p", filename);
Metadata attachMeta = new Metadata();
- attachMeta.set(Metadata.RESOURCE_NAME_KEY, filename);
- attachMeta.set(Metadata.EMBEDDED_RELATIONSHIP_ID, filename);
+ attachMeta.set(TikaCoreProperties.RESOURCE_NAME_KEY, filename);
+ attachMeta.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, filename);
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", "class", "class", "CDATA", "embedded");
attributes.addAttribute("", "id", "id", "CDATA", filename);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
index 35e9ff6..5c7d445 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
@@ -108,13 +108,13 @@ abstract class AbstractPOIFSExtractor {
Metadata metadata = new Metadata();
if (filename != null) {
metadata.set(Metadata.TIKA_MIME_FILE, filename);
- metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, filename);
}
if (relationshipID != null) {
- metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, relationshipID);
+ metadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, relationshipID);
}
if (storageClassID != null) {
- metadata.set(Metadata.EMBEDDED_STORAGE_CLASS_ID, storageClassID.toString());
+ metadata.set(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID, storageClassID.toString());
}
if (mediaType != null) {
metadata.set(Metadata.CONTENT_TYPE, mediaType);
@@ -170,9 +170,9 @@ abstract class AbstractPOIFSExtractor {
// What kind of document is it?
Metadata metadata = new Metadata();
- metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, dir.getName());
+ metadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, dir.getName());
if (dir.getStorageClsid() != null) {
- metadata.set(Metadata.EMBEDDED_STORAGE_CLASS_ID, dir.getStorageClsid().toString());
+ metadata.set(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID, dir.getStorageClsid().toString());
}
POIFSDocumentType type = POIFSDocumentType.detectType(dir);
TikaInputStream embedded = null;
@@ -183,7 +183,7 @@ abstract class AbstractPOIFSExtractor {
// Try to un-wrap the OLE10Native record:
Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode) dir);
if (ole.getLabel() != null) {
- metadata.set(Metadata.RESOURCE_NAME_KEY, rName + '/' + ole.getLabel());
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, rName + '/' + ole.getLabel());
}
if (ole.getCommand() != null) {
metadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, ole.getCommand());
@@ -229,14 +229,14 @@ abstract class AbstractPOIFSExtractor {
// Record what we can do about it
metadata.set(Metadata.CONTENT_TYPE, mediaType.getType().toString());
- metadata.set(Metadata.RESOURCE_NAME_KEY, rName + extension);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, rName + extension);
} catch (Exception e) {
EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
return;
}
} else {
metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
- metadata.set(Metadata.RESOURCE_NAME_KEY, rName + '.' + type.getExtension());
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, rName + '.' + type.getExtension());
}
// Should we parse it?
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
index f379491..ef8aedf 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessParser.java
@@ -33,6 +33,7 @@ import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
@@ -50,9 +51,9 @@ import org.xml.sax.SAXException;
*/
public class JackcessParser extends AbstractParser {
- public static final String SUMMARY_PROPERTY_PREFIX = "MDB_SUMMARY_PROP" + Metadata.NAMESPACE_PREFIX_DELIMITER;
- public static String MDB_PROPERTY_PREFIX = "MDB_PROP" + Metadata.NAMESPACE_PREFIX_DELIMITER;
- public static String USER_DEFINED_PROPERTY_PREFIX = "MDB_USER_PROP" + Metadata.NAMESPACE_PREFIX_DELIMITER;
+ public static final String SUMMARY_PROPERTY_PREFIX = "MDB_SUMMARY_PROP" + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
+ public static String MDB_PROPERTY_PREFIX = "MDB_PROP" + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
+ public static String USER_DEFINED_PROPERTY_PREFIX = "MDB_USER_PROP" + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
public static Property MDB_PW = Property.externalText("Password");
private final static LinkResolver IGNORE_LINK_RESOLVER = new IgnoreLinkResolver();
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
index 0375156..fab72cc 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
@@ -327,7 +327,7 @@ public class OfficeParser extends AbstractOfficeParser {
}
for (Map.Entry<String, String> e : macros.entrySet()) {
Metadata m = new Metadata();
- m.set(Metadata.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
+ m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
m.set(Metadata.CONTENT_TYPE, "text/x-vbasic");
if (embeddedDocumentExtractor.shouldParseEmbedded(m)) {
embeddedDocumentExtractor.parseEmbedded(
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index 649de67..e8ce7f0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -166,7 +166,7 @@ public class OutlookExtractor extends AbstractPOIFSExtractor {
metadata.set(TikaCoreProperties.TITLE, subject);
// TODO: Move to description in Tika 2.0
- metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_DESCRIPTION,
+ metadata.set(TikaCoreProperties.DESCRIPTION,
msg.getConversationTopic());
try {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
index 3e2ea26..46d5591 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/SummaryExtractor.java
@@ -34,7 +34,6 @@ import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
@@ -104,9 +103,10 @@ public class SummaryExtractor {
private void parse(SummaryInformation summary) {
set(TikaCoreProperties.TITLE, summary.getTitle());
addMulti(metadata, TikaCoreProperties.CREATOR, summary.getAuthor());
- set(TikaCoreProperties.KEYWORDS, summary.getKeywords());
- // TODO Move to OO subject in Tika 2.0
- set(TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, summary.getSubject());
+ //make sure these are retrievable specifically
+ add(Office.KEYWORDS, summary.getKeywords());
+ add(OfficeOpenXMLCore.SUBJECT, summary.getSubject());
+
set(TikaCoreProperties.MODIFIER, summary.getLastAuthor());
set(TikaCoreProperties.COMMENTS, summary.getComments());
set(OfficeOpenXMLExtended.TEMPLATE, summary.getTemplate());
@@ -115,7 +115,7 @@ public class SummaryExtractor {
set(TikaCoreProperties.CREATED, summary.getCreateDateTime());
set(TikaCoreProperties.MODIFIED, summary.getLastSaveDateTime());
set(TikaCoreProperties.PRINT_DATE, summary.getLastPrinted());
- set(Metadata.EDIT_TIME, summary.getEditTime());
+ set(OfficeOpenXMLExtended.TOTAL_TIME, Long.toString(summary.getEditTime()));
set(OfficeOpenXMLExtended.DOC_SECURITY, summary.getSecurity());
// New style counts
@@ -125,16 +125,6 @@ public class SummaryExtractor {
if (summary.getPageCount() > 0) {
metadata.set(PagedText.N_PAGES, summary.getPageCount());
}
-
- // Old style, Tika 1.0 properties
- // TODO Remove these in Tika 2.0
- set(Metadata.TEMPLATE, summary.getTemplate());
- set(Metadata.APPLICATION_NAME, summary.getApplicationName());
- set(Metadata.REVISION_NUMBER, summary.getRevNumber());
- set(Metadata.SECURITY, summary.getSecurity());
- set(MSOffice.WORD_COUNT, summary.getWordCount());
- set(MSOffice.CHARACTER_COUNT, summary.getCharCount());
- set(MSOffice.PAGE_COUNT, summary.getPageCount());
}
private void parse(DocumentSummaryInformation summary) {
@@ -148,12 +138,6 @@ public class SummaryExtractor {
if (summary.getSlideCount() > 0) {
metadata.set(PagedText.N_PAGES, summary.getSlideCount());
}
- // Old style, Tika 1.0 counts
- // TODO Remove these in Tika 2.0
- set(Metadata.COMPANY, summary.getCompany());
- set(Metadata.MANAGER, summary.getManager());
- set(MSOffice.SLIDE_COUNT, summary.getSlideCount());
- set(Metadata.CATEGORY, summary.getCategory());
parse(summary.getCustomProperties());
}
@@ -178,7 +162,7 @@ public class SummaryExtractor {
if (customProperties != null) {
for (String name : customProperties.nameSet()) {
// Apply the custom prefix
- String key = Metadata.USER_DEFINED_METADATA_NAME_PREFIX + name;
+ String key = Office.USER_DEFINED_METADATA_NAME_PREFIX + name;
// Get, convert and save property value
Object value = customProperties.get(name);
@@ -216,6 +200,12 @@ public class SummaryExtractor {
}
}
+ private void add(Property property, String value) {
+ if (value != null) {
+ metadata.add(property, value);
+ }
+ }
+
private void set(Property property, Date value) {
if (value != null) {
metadata.set(property, value);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/TNEFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/TNEFParser.java
index 484f0c5..ed13958 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/TNEFParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/TNEFParser.java
@@ -77,8 +77,8 @@ public class TNEFParser extends AbstractParser {
// Set the message subject if known
String subject = msg.getSubject();
if (subject != null && subject.length() > 0) {
- // TODO: Move to title in Tika 2.0
- metadata.set(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, subject);
+ metadata.set(TikaCoreProperties.TITLE, subject);
+ metadata.set(TikaCoreProperties.SUBJECT, subject);
}
// Recurse into the message body RTF
@@ -116,7 +116,7 @@ public class TNEFParser extends AbstractParser {
throws IOException, SAXException, TikaException {
Metadata metadata = new Metadata();
if (name != null)
- metadata.set(Metadata.RESOURCE_NAME_KEY, name);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
if (type != null)
metadata.set(Metadata.CONTENT_TYPE, type);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
index 2560db0..610ddd9 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
@@ -164,7 +164,7 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
InputStream tStream = tPart.getInputStream();
Metadata thumbnailMetadata = new Metadata();
String thumbName = tPart.getPartName().getName();
- thumbnailMetadata.set(Metadata.RESOURCE_NAME_KEY, thumbName);
+ thumbnailMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, thumbName);
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(XHTML, "class", "class", "CDATA", "embedded");
@@ -172,7 +172,7 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
handler.startElement(XHTML, "div", "div", attributes);
handler.endElement(XHTML, "div", "div");
- thumbnailMetadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, thumbName);
+ thumbnailMetadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, thumbName);
thumbnailMetadata.set(Metadata.CONTENT_TYPE, tPart.getContentType());
thumbnailMetadata.set(TikaCoreProperties.TITLE, tPart.getPartName().getName());
@@ -294,7 +294,7 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
TikaInputStream stream = null;
try {
Metadata metadata = new Metadata();
- metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, rel);
+ metadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, rel);
DirectoryNode root = fs.getRoot();
POIFSDocumentType type = POIFSDocumentType.detectType(root);
@@ -318,7 +318,7 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
Ole10Native ole =
Ole10Native.createFromEmbeddedOleObject(fs);
if (ole.getLabel() != null) {
- metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel());
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, ole.getLabel());
}
if (ole.getCommand() != null) {
metadata.add(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, ole.getCommand());
@@ -362,12 +362,12 @@ public abstract class AbstractOOXMLExtractor implements OOXMLExtractor {
protected void handleEmbeddedFile(PackagePart part, ContentHandler handler, String rel)
throws SAXException, IOException {
Metadata metadata = new Metadata();
- metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, rel);
+ metadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, rel);
// Get the name
String name = part.getPartName().getName();
metadata.set(
- Metadata.RESOURCE_NAME_KEY,
+ TikaCoreProperties.RESOURCE_NAME_KEY,
name.substring(name.lastIndexOf('/') + 1));
// Get the content type
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
index dbbb839..30f2975 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/MetadataExtractor.java
@@ -27,7 +27,6 @@ import org.apache.poi.openxml4j.opc.internal.PackagePropertiesPart;
import org.apache.poi.openxml4j.util.Nullable;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
@@ -75,45 +74,35 @@ public class MetadataExtractor {
PackagePropertiesPart propsHolder = properties
.getUnderlyingProperties();
- addProperty(metadata, OfficeOpenXMLCore.CATEGORY, propsHolder.getCategoryProperty());
- addProperty(metadata, OfficeOpenXMLCore.CONTENT_STATUS, propsHolder
+ setProperty(metadata, OfficeOpenXMLCore.CATEGORY, propsHolder.getCategoryProperty());
+ setProperty(metadata, OfficeOpenXMLCore.CONTENT_STATUS, propsHolder
.getContentStatusProperty());
- addProperty(metadata, TikaCoreProperties.CREATED, propsHolder
+ setProperty(metadata, TikaCoreProperties.CREATED, propsHolder
.getCreatedProperty());
addMultiProperty(metadata, TikaCoreProperties.CREATOR, propsHolder
.getCreatorProperty());
- addProperty(metadata, TikaCoreProperties.DESCRIPTION, propsHolder
+ setProperty(metadata, TikaCoreProperties.DESCRIPTION, propsHolder
.getDescriptionProperty());
- addProperty(metadata, TikaCoreProperties.IDENTIFIER, propsHolder
+ setProperty(metadata, TikaCoreProperties.IDENTIFIER, propsHolder
.getIdentifierProperty());
- addProperty(metadata, TikaCoreProperties.KEYWORDS, propsHolder
+ addProperty(metadata, OfficeOpenXMLCore.SUBJECT,
+ propsHolder.getSubjectProperty());
+ addProperty(metadata, Office.KEYWORDS, propsHolder
.getKeywordsProperty());
- addProperty(metadata, TikaCoreProperties.LANGUAGE, propsHolder
+ setProperty(metadata, TikaCoreProperties.LANGUAGE, propsHolder
.getLanguageProperty());
- addProperty(metadata, TikaCoreProperties.MODIFIER, propsHolder
+ setProperty(metadata, TikaCoreProperties.MODIFIER, propsHolder
.getLastModifiedByProperty());
- addProperty(metadata, TikaCoreProperties.PRINT_DATE, propsHolder
+ setProperty(metadata, TikaCoreProperties.PRINT_DATE, propsHolder
.getLastPrintedProperty());
- addProperty(metadata, Metadata.LAST_MODIFIED, propsHolder
- .getModifiedProperty());
- addProperty(metadata, TikaCoreProperties.MODIFIED, propsHolder
+ setProperty(metadata, TikaCoreProperties.MODIFIED, propsHolder
.getModifiedProperty());
- addProperty(metadata, OfficeOpenXMLCore.REVISION, propsHolder
+ setProperty(metadata, OfficeOpenXMLCore.REVISION, propsHolder
.getRevisionProperty());
- // TODO: Move to OO subject in Tika 2.0
- addProperty(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT,
- propsHolder.getSubjectProperty());
- addProperty(metadata, TikaCoreProperties.TITLE, propsHolder.getTitleProperty());
- addProperty(metadata, OfficeOpenXMLCore.VERSION, propsHolder.getVersionProperty());
- // Legacy Tika-1.0 style stats
- // TODO Remove these in Tika 2.0
- addProperty(metadata, Metadata.CATEGORY, propsHolder.getCategoryProperty());
- addProperty(metadata, Metadata.CONTENT_STATUS, propsHolder
- .getContentStatusProperty());
- addProperty(metadata, Metadata.REVISION_NUMBER, propsHolder
- .getRevisionProperty());
- addProperty(metadata, Metadata.VERSION, propsHolder.getVersionProperty());
+ setProperty(metadata, TikaCoreProperties.TITLE, propsHolder.getTitleProperty());
+ setProperty(metadata, OfficeOpenXMLCore.VERSION, propsHolder.getVersionProperty());
+
}
private void extractMetadata(ExtendedProperties properties,
@@ -130,15 +119,15 @@ public class MetadataExtractor {
} catch (XmlValueOutOfRangeException e) {
//swallow for now
}
- addProperty(metadata, OfficeOpenXMLExtended.APPLICATION, propsHolder.getApplication());
- addProperty(metadata, OfficeOpenXMLExtended.APP_VERSION, propsHolder.getAppVersion());
- addProperty(metadata, TikaCoreProperties.PUBLISHER, propsHolder.getCompany());
- addProperty(metadata, OfficeOpenXMLExtended.COMPANY, propsHolder.getCompany());
+ setProperty(metadata, OfficeOpenXMLExtended.APPLICATION, propsHolder.getApplication());
+ setProperty(metadata, OfficeOpenXMLExtended.APP_VERSION, propsHolder.getAppVersion());
+ setProperty(metadata, TikaCoreProperties.PUBLISHER, propsHolder.getCompany());
+ setProperty(metadata, OfficeOpenXMLExtended.COMPANY, propsHolder.getCompany());
SummaryExtractor.addMulti(metadata, OfficeOpenXMLExtended.MANAGER, propsHolder.getManager());
- addProperty(metadata, OfficeOpenXMLExtended.NOTES, propsHolder.getNotes());
- addProperty(metadata, OfficeOpenXMLExtended.PRESENTATION_FORMAT, propsHolder.getPresentationFormat());
- addProperty(metadata, OfficeOpenXMLExtended.TEMPLATE, propsHolder.getTemplate());
- addProperty(metadata, OfficeOpenXMLExtended.TOTAL_TIME, totalTime);
+ setProperty(metadata, OfficeOpenXMLExtended.NOTES, propsHolder.getNotes());
+ setProperty(metadata, OfficeOpenXMLExtended.PRESENTATION_FORMAT, propsHolder.getPresentationFormat());
+ setProperty(metadata, OfficeOpenXMLExtended.TEMPLATE, propsHolder.getTemplate());
+ setProperty(metadata, OfficeOpenXMLExtended.TOTAL_TIME, totalTime);
if (propsHolder.getPages() > 0) {
metadata.set(PagedText.N_PAGES, propsHolder.getPages());
@@ -147,30 +136,13 @@ public class MetadataExtractor {
}
// Process the document statistics
- addProperty(metadata, Office.PAGE_COUNT, propsHolder.getPages());
- addProperty(metadata, Office.SLIDE_COUNT, propsHolder.getSlides());
- addProperty(metadata, Office.PARAGRAPH_COUNT, propsHolder.getParagraphs());
- addProperty(metadata, Office.LINE_COUNT, propsHolder.getLines());
- addProperty(metadata, Office.WORD_COUNT, propsHolder.getWords());
- addProperty(metadata, Office.CHARACTER_COUNT, propsHolder.getCharacters());
- addProperty(metadata, Office.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
-
- // Legacy Tika-1.0 style stats
- // TODO Remove these in Tika 2.0
- addProperty(metadata, Metadata.APPLICATION_NAME, propsHolder.getApplication());
- addProperty(metadata, Metadata.APPLICATION_VERSION, propsHolder.getAppVersion());
- addProperty(metadata, Metadata.MANAGER, propsHolder.getManager());
- addProperty(metadata, Metadata.NOTES, propsHolder.getNotes());
- addProperty(metadata, Metadata.PRESENTATION_FORMAT, propsHolder.getPresentationFormat());
- addProperty(metadata, Metadata.TEMPLATE, propsHolder.getTemplate());
- addProperty(metadata, Metadata.TOTAL_TIME, totalTime);
- addProperty(metadata, MSOffice.PAGE_COUNT, propsHolder.getPages());
- addProperty(metadata, MSOffice.SLIDE_COUNT, propsHolder.getSlides());
- addProperty(metadata, MSOffice.PARAGRAPH_COUNT, propsHolder.getParagraphs());
- addProperty(metadata, MSOffice.LINE_COUNT, propsHolder.getLines());
- addProperty(metadata, MSOffice.WORD_COUNT, propsHolder.getWords());
- addProperty(metadata, MSOffice.CHARACTER_COUNT, propsHolder.getCharacters());
- addProperty(metadata, MSOffice.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
+ setProperty(metadata, Office.PAGE_COUNT, propsHolder.getPages());
+ setProperty(metadata, Office.SLIDE_COUNT, propsHolder.getSlides());
+ setProperty(metadata, Office.PARAGRAPH_COUNT, propsHolder.getParagraphs());
+ setProperty(metadata, Office.LINE_COUNT, propsHolder.getLines());
+ setProperty(metadata, Office.WORD_COUNT, propsHolder.getWords());
+ setProperty(metadata, Office.CHARACTER_COUNT, propsHolder.getCharacters());
+ setProperty(metadata, Office.CHARACTER_COUNT_WITH_SPACES, propsHolder.getCharactersWithSpaces());
}
private void extractMetadata(CustomProperties properties,
@@ -257,7 +229,7 @@ public class MetadataExtractor {
}
}
- private <T> void addProperty(Metadata metadata, Property property, Nullable<T> nullableValue) {
+ private <T> void setProperty(Metadata metadata, Property property, Nullable<T> nullableValue) {
T value = nullableValue.getValue();
if (value != null) {
if (value instanceof Date) {
@@ -272,31 +244,41 @@ public class MetadataExtractor {
}
}
- private void addProperty(Metadata metadata, String name, Nullable<?> value) {
+ private <T> void addProperty(Metadata metadata, Property property, Nullable<T> nullableValue) {
+ T value = nullableValue.getValue();
+ if (value != null) {
+ if (value instanceof String) {
+ metadata.add(property, (String) value);
+ } else {
+ throw new IllegalArgumentException("Can't add property of class: "+nullableValue.getClass());
+ }
+ }
+ }
+ private void setProperty(Metadata metadata, String name, Nullable<?> value) {
if (value.getValue() != null) {
- addProperty(metadata, name, value.getValue().toString());
+ setProperty(metadata, name, value.getValue().toString());
}
}
- private void addProperty(Metadata metadata, Property property, String value) {
+ private void setProperty(Metadata metadata, Property property, String value) {
if (value != null) {
metadata.set(property, value);
}
}
- private void addProperty(Metadata metadata, String name, String value) {
+ private void setProperty(Metadata metadata, String name, String value) {
if (value != null) {
metadata.set(name, value);
}
}
- private void addProperty(Metadata metadata, Property property, int value) {
+ private void setProperty(Metadata metadata, Property property, int value) {
if (value > 0) {
metadata.set(property, value);
}
}
- private void addProperty(Metadata metadata, String name, int value) {
+ private void setProperty(Metadata metadata, String name, int value) {
if (value > 0) {
metadata.set(name, Integer.toString(value));
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFBExcelExtractorDecorator.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFBExcelExtractorDecorator.java
index 0a511c2..dcde62b 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFBExcelExtractorDecorator.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFBExcelExtractorDecorator.java
@@ -47,7 +47,6 @@ import org.apache.poi.xssf.usermodel.XSSFSimpleShape;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.xmlbeans.XmlException;
@@ -80,7 +79,7 @@ public class XSSFBExcelExtractorDecorator extends XSSFExcelExtractorDecorator {
this.metadata = metadata;
this.parseContext = context;
- metadata.set(TikaMetadataKeys.PROTECTED, "false");
+ metadata.set(TikaCoreProperties.PROTECTED, "false");
super.getXHTML(handler, metadata, context);
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
index c3b7285..9a2b017 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
@@ -55,7 +55,6 @@ import org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.microsoft.TikaExcelDataFormatter;
import org.apache.tika.sax.OfflineContentHandler;
@@ -115,7 +114,7 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor {
this.metadata = metadata;
this.parseContext = context;
- metadata.set(TikaMetadataKeys.PROTECTED, "false");
+ metadata.set(TikaCoreProperties.PROTECTED, "false");
super.getXHTML(handler, metadata, context);
}
@@ -345,7 +344,7 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor {
sheetInputStream.close();
if (handler.hasProtection) {
- metadata.set(TikaMetadataKeys.PROTECTED, "true");
+ metadata.set(TikaCoreProperties.PROTECTED, "true");
}
} catch (TikaException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
index e9890c8..a101574 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/xml/WordMLParser.java
@@ -281,7 +281,7 @@ public class WordMLParser extends AbstractXML2003Parser {
try (TikaInputStream is = TikaInputStream.get(rawBytes)) {
Metadata metadata = new Metadata();
if (pictName != null) {
- metadata.set(Metadata.RESOURCE_NAME_KEY, pictName);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, pictName);
}
if (pictSource != null) {
metadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, pictSource);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
index 47e2f3d..7baae65 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
@@ -168,7 +168,7 @@ public class MP4Parser extends AbstractParser {
MovieHeaderBox mHeader = getOrNull(moov, MovieHeaderBox.class);
if (mHeader != null) {
// Get the creation and modification dates
- metadata.set(Metadata.CREATION_DATE, mHeader.getCreationTime());
+ metadata.set(TikaCoreProperties.CREATED, mHeader.getCreationTime());
metadata.set(TikaCoreProperties.MODIFIED, mHeader.getModificationTime());
// Get the duration
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
index 14b9674..6d84055 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentMetaParser.java
@@ -18,13 +18,16 @@ package org.apache.tika.parser.odf;
import java.io.IOException;
import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.DublinCore;
-import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.PagedText;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -54,15 +57,6 @@ public class OpenDocumentMetaParser extends XMLParser {
private static final String META_NS = "urn:oasis:names:tc:opendocument:xmlns:meta:1.0";
private static final XPathParser META_XPATH = new XPathParser("meta", META_NS);
- /**
- * @see OfficeOpenXMLCore#SUBJECT
- * @deprecated use OfficeOpenXMLCore#SUBJECT
- */
- @Deprecated
- private static final Property TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR =
- Property.composite(Office.INITIAL_AUTHOR,
- new Property[]{Property.externalText("initial-creator")});
-
private static ContentHandler getDublinCoreHandler(
Metadata metadata, Property property, String element) {
return new ElementMetadataHandler(
@@ -87,7 +81,7 @@ public class OpenDocumentMetaParser extends XMLParser {
META_XPATH.parse("//meta:user-defined//text()"));
// eg <meta:user-defined meta:name="Info1">Text1</meta:user-defined> becomes custom:Info1=Text1
ContentHandler branch = new MatchingContentHandler(
- new AttributeDependantMetadataHandler(md, "meta:name", Metadata.USER_DEFINED_METADATA_NAME_PREFIX),
+ new AttributeDependantMetadataHandler(md, "meta:name", Office.USER_DEFINED_METADATA_NAME_PREFIX),
matcher);
return new TeeContentHandler(ch, branch);
}
@@ -136,12 +130,13 @@ public class OpenDocumentMetaParser extends XMLParser {
// ODF uses dc:subject for description
ch = new TeeContentHandler(ch, new ElementMetadataHandler(
DublinCore.NAMESPACE_URI_DC, "subject",
- md, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT));
- ch = getMeta(ch, md, TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT, "keyword");
+ md, OfficeOpenXMLCore.SUBJECT));
+
+ ch = getMeta(ch, md, Office.KEYWORDS, "keyword");
- ch = getMeta(ch, md, Property.externalText(MSOffice.EDIT_TIME), "editing-duration");
+ ch = getMeta(ch, md, OfficeOpenXMLExtended.TOTAL_TIME, "editing-duration");
ch = getMeta(ch, md, Property.externalText("editing-cycles"), "editing-cycles");
- ch = getMeta(ch, md, TRANSITION_INITIAL_CREATOR_TO_INITIAL_AUTHOR, "initial-creator");
+ ch = getMeta(ch, md, TikaCoreProperties.CREATOR, "initial-creator");
ch = getMeta(ch, md, Property.externalText("generator"), "generator");
// Process the user defined Meta Attributes
@@ -157,25 +152,6 @@ public class OpenDocumentMetaParser extends XMLParser {
ch = getStatistic(ch, md, Office.WORD_COUNT, "word-count");
ch = getStatistic(ch, md, Office.CHARACTER_COUNT, "character-count");
- // Legacy, Tika-1.0 style attributes
- // TODO Remove these in Tika 2.0
- ch = getStatistic(ch, md, MSOffice.OBJECT_COUNT, "object-count");
- ch = getStatistic(ch, md, MSOffice.IMAGE_COUNT, "image-count");
- ch = getStatistic(ch, md, MSOffice.PAGE_COUNT, "page-count");
- ch = getStatistic(ch, md, MSOffice.TABLE_COUNT, "table-count");
- ch = getStatistic(ch, md, MSOffice.PARAGRAPH_COUNT, "paragraph-count");
- ch = getStatistic(ch, md, MSOffice.WORD_COUNT, "word-count");
- ch = getStatistic(ch, md, MSOffice.CHARACTER_COUNT, "character-count");
-
- // Legacy Statistics Attributes, replaced with real keys above
- // TODO Remove these shortly, eg after Tika 1.1 (TIKA-770)
- ch = getStatistic(ch, md, "nbPage", "page-count");
- ch = getStatistic(ch, md, "nbPara", "paragraph-count");
- ch = getStatistic(ch, md, "nbWord", "word-count");
- ch = getStatistic(ch, md, "nbCharacter", "character-count");
- ch = getStatistic(ch, md, "nbTab", "table-count");
- ch = getStatistic(ch, md, "nbObject", "object-count");
- ch = getStatistic(ch, md, "nbImg", "image-count");
// Normalise the rest
ch = new NSNormalizerContentHandler(ch);
@@ -194,6 +170,22 @@ public class OpenDocumentMetaParser extends XMLParser {
(metadata.get(TikaCoreProperties.DESCRIPTION) == null || metadata.get(TikaCoreProperties.DESCRIPTION).equals(""))) {
metadata.set(TikaCoreProperties.DESCRIPTION, odfSubject);
}
+ //reset the dc:subject to include both keywords and subject
+ //We can't relying on composite keys in the MatchingContentHandlers
+ //because those are "setting" not "adding" to the Metadata object
+ List<String> subjects = new ArrayList<>();
+ if (metadata.getValues(Office.KEYWORDS) != null) {
+ subjects.addAll(Arrays.asList(metadata.getValues(Office.KEYWORDS)));
+ }
+
+ if (metadata.getValues(OfficeOpenXMLCore.SUBJECT) != null) {
+ subjects.addAll(Arrays.asList(metadata.getValues(OfficeOpenXMLCore.SUBJECT)));
+ }
+
+ if (subjects.size() > 0) {
+ metadata.set(TikaCoreProperties.SUBJECT,
+ subjects.toArray(new String[subjects.size()]));
+ }
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
index e7eb76a..c8aa65e 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
@@ -36,7 +36,6 @@ import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
@@ -239,7 +238,7 @@ public class OpenDocumentParser extends AbstractParser {
TikaCoreProperties.EmbeddedResourceType.THUMBNAIL);
}*/
if (embeddedName.contains("Pictures/")) {
- embeddedMetadata.set(TikaMetadataKeys.EMBEDDED_RESOURCE_TYPE,
+ embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
TikaCoreProperties.EmbeddedResourceType.INLINE.toString());
}
if (embeddedDocumentExtractor.shouldParseEmbedded(embeddedMetadata)) {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
index fc46136..f086d6e 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
@@ -256,7 +256,7 @@ class AbstractPDF2XHTML extends PDFTextStripper {
// TODO: other metadata?
Metadata embeddedMetadata = new Metadata();
- embeddedMetadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
+ embeddedMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, fileName);
embeddedMetadata.set(Metadata.CONTENT_TYPE, file.getSubtype());
embeddedMetadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.getSize()));
embeddedMetadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
index c67a7bb..1602a08 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
@@ -226,7 +226,7 @@ class PDF2XHTML extends AbstractPDF2XHTML {
imageNumber = inlineImageCounter++;
}
String fileName = "image" + imageNumber + "."+extension;
- embeddedMetadata.set(Metadata.RESOURCE_NAME_KEY, fileName);
+ embeddedMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, fileName);
// Output the img tag
AttributesImpl attr = new AttributesImpl();
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
index 303d8e6..b2a15ca 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
@@ -58,6 +58,8 @@ import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.AccessPermissions;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
+import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.PDF;
import org.apache.tika.metadata.PagedText;
import org.apache.tika.metadata.Property;
@@ -249,24 +251,21 @@ public class PDFParser extends AbstractParser implements Initializable {
extractDublinCoreListItems(metadata, TikaCoreProperties.CONTRIBUTOR, null, dcSchema);
addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator());
addMetadata(metadata, PDF.DOC_INFO_CREATOR_TOOL, info.getCreator());
- addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords());
+ addMetadata(metadata, Office.KEYWORDS, info.getKeywords());
addMetadata(metadata, PDF.DOC_INFO_KEY_WORDS, info.getKeywords());
- addMetadata(metadata, "producer", info.getProducer());
addMetadata(metadata, PDF.DOC_INFO_PRODUCER, info.getProducer());
extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema);
addMetadata(metadata, PDF.DOC_INFO_SUBJECT, info.getSubject());
- // TODO: Move to description in Tika 2.0
- addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject());
- addMetadata(metadata, "trapped", info.getTrapped());
+ addMetadata(metadata, TikaCoreProperties.SUBJECT, info.getKeywords());
+ addMetadata(metadata, TikaCoreProperties.SUBJECT, info.getSubject());
+ addMetadata(metadata, OfficeOpenXMLCore.SUBJECT, info.getSubject());
+
addMetadata(metadata, PDF.DOC_INFO_TRAPPED, info.getTrapped());
- // TODO Remove these in Tika 2.0
- addMetadata(metadata, "created", info.getCreationDate());
addMetadata(metadata, PDF.DOC_INFO_CREATED, info.getCreationDate());
addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate());
Calendar modified = info.getModificationDate();
- addMetadata(metadata, Metadata.LAST_MODIFIED, modified);
addMetadata(metadata, TikaCoreProperties.MODIFIED, modified);
addMetadata(metadata, PDF.DOC_INFO_MODIFICATION_DATE, info.getModificationDate());
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
index 48f8bec..3ffaf66 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/CompressorParser.java
@@ -44,6 +44,7 @@ import org.apache.tika.exception.TikaMemoryLimitException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
@@ -193,7 +194,7 @@ public class CompressorParser extends AbstractParser {
try {
Metadata entrydata = new Metadata();
- String name = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (name != null) {
if (name.endsWith(".tbz")) {
name = name.substring(0, name.length() - 4) + ".tar";
@@ -212,7 +213,7 @@ public class CompressorParser extends AbstractParser {
} else if (name.length() > 0) {
name = GzipUtils.getUncompressedFilename(name);
}
- entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
+ entrydata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
}
// Use the delegate parser to parse the compressed document
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
index 6ec201e..14dd588 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
@@ -382,14 +382,14 @@ public class PackageParser extends AbstractParser {
}
if (name != null && name.length() > 0) {
name = name.replace("\\", "/");
- entrydata.set(Metadata.RESOURCE_NAME_KEY, name);
+ entrydata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute("", "class", "class", "CDATA", "embedded");
attributes.addAttribute("", "id", "id", "CDATA", name);
xhtml.startElement("div", attributes);
xhtml.endElement("div");
- entrydata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, name);
+ entrydata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, name);
}
return entrydata;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java
index ddb45f6..a7c8bd0 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java
@@ -89,7 +89,7 @@ public class PRTParser extends AbstractParser {
dateStr.substring(10, 12) + ":00";
metadata.set(TikaCoreProperties.CREATED, formattedDate);
// TODO Metadata.DATE is used as modified, should it be here?
- metadata.set(Metadata.DATE, formattedDate);
+ metadata.set(TikaCoreProperties.CREATED, formattedDate);
}
metadata.set(Metadata.CONTENT_TYPE, PRT_MIME_TYPE);
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
index 26f0084..e2ef83c 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFEmbObjHandler.java
@@ -184,8 +184,8 @@ class RTFEmbObjHandler {
} else if (state == EMB_STATE.PICT) {
String filePath = metadata.get(RTFMetadata.RTF_PICT_META_PREFIX + "wzDescription");
if (filePath != null && filePath.length() > 0) {
- metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, filePath);
- metadata.set(Metadata.RESOURCE_NAME_KEY, FilenameUtils.getName(filePath));
+ metadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, filePath);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, FilenameUtils.getName(filePath));
metadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, filePath);
}
metadata.set(RTFMetadata.THUMBNAIL, Boolean.toString(inObject));
@@ -208,13 +208,13 @@ class RTFEmbObjHandler {
if (embeddedDocumentUtil.shouldParseEmbedded(metadata)) {
TikaInputStream stream = TikaInputStream.get(bytes);
- if (metadata.get(Metadata.RESOURCE_NAME_KEY) == null) {
+ if (metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY) == null) {
String extension = embeddedDocumentUtil.getExtension(stream, metadata);
if (inObject && state == EMB_STATE.PICT) {
- metadata.set(Metadata.RESOURCE_NAME_KEY, "thumbnail_" + thumbCount++ + extension);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "thumbnail_" + thumbCount++ + extension);
metadata.set(RTFMetadata.THUMBNAIL, "true");
} else {
- metadata.set(Metadata.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() +
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() +
extension);
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
index a43c789..0b3322b 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFObjDataParser.java
@@ -185,7 +185,7 @@ class RTFObjDataParser {
is.reset();
IOUtils.copy(is, out);
ret = out.toByteArray();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension());
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "file_" + unknownFilenameCount.getAndIncrement() + "." + type.getExtension());
metadata.set(Metadata.CONTENT_TYPE, type.getType().toString());
}
}
@@ -258,8 +258,8 @@ class RTFObjDataParser {
pathToUse = ansiFilePath == null ? "" : ansiFilePath;
}
metadata.set(TikaCoreProperties.ORIGINAL_RESOURCE_NAME, fileNameToUse);
- metadata.set(Metadata.RESOURCE_NAME_KEY, FilenameUtils.getName(fileNameToUse));
- metadata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, pathToUse);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, FilenameUtils.getName(fileNameToUse));
+ metadata.set(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID, pathToUse);
return objBytes;
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
index a0dac99..c2e8294 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
@@ -1049,10 +1049,9 @@ final class TextExtractor {
} else if (equals("title")) {
nextMetaData = TikaCoreProperties.TITLE;
} else if (equals("subject")) {
- // TODO: Move to OO subject in Tika 2.0
- nextMetaData = TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT;
+ nextMetaData = OfficeOpenXMLCore.SUBJECT;
} else if (equals("keywords")) {
- nextMetaData = TikaCoreProperties.TRANSITION_KEYWORDS_TO_DC_SUBJECT;
+ nextMetaData = Office.KEYWORDS;
} else if (equals("category")) {
nextMetaData = OfficeOpenXMLCore.CATEGORY;
} else if (equals("comment")) {
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java b/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
index a467651..58ee184 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/utils/CommonsDigester.java
@@ -59,7 +59,7 @@ public class CommonsDigester extends CompositeDigester {
}
String getMetadataKey() {
return TikaCoreProperties.TIKA_META_PREFIX +
- "digest" + Metadata.NAMESPACE_PREFIX_DELIMITER + this.toString();
+ "digest" + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + this.toString();
}
}
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java
index 5999773..436e26a 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java
@@ -44,7 +44,7 @@ public class DcXMLParser extends XMLParser {
return new TeeContentHandler(
super.getContentHandler(handler, metadata, context),
getDublinCoreHandler(metadata, TikaCoreProperties.TITLE, "title"),
- getDublinCoreHandler(metadata, TikaCoreProperties.KEYWORDS, "subject"),
+ getDublinCoreHandler(metadata, TikaCoreProperties.SUBJECT, "subject"),
getDublinCoreHandler(metadata, TikaCoreProperties.CREATOR, "creator"),
getDublinCoreHandler(metadata, TikaCoreProperties.DESCRIPTION, "description"),
getDublinCoreHandler(metadata, TikaCoreProperties.PUBLISHER, "publisher"),
diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/xml/FictionBookParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/xml/FictionBookParser.java
index bf06a08..0f3659b 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/xml/FictionBookParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/xml/FictionBookParser.java
@@ -25,7 +25,7 @@ import org.apache.commons.codec.binary.Base64;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.Attributes;
@@ -71,7 +71,7 @@ public class FictionBookParser extends XMLParser {
binaryData.setLength(0);
metadata = new Metadata();
- metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, attributes.getValue(ATTRIBUTE_ID));
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, attributes.getValue(ATTRIBUTE_ID));
metadata.set(Metadata.CONTENT_TYPE, attributes.getValue(ATTRIBUTE_CONTENT_TYPE));
}
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java b/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
index e4117c4..ab7a9e4 100644
--- a/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
+++ b/tika-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java
@@ -29,6 +29,7 @@ import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.parser.iwork.iwana.IWork13PackageParser;
@@ -60,7 +61,7 @@ public class TestContainerAwareDetector {
TestContainerAwareDetector.class.getResource("/test-documents/" + dataFile))) {
Metadata m = new Metadata();
if (name != null)
- m.add(Metadata.RESOURCE_NAME_KEY, name);
+ m.add(TikaCoreProperties.RESOURCE_NAME_KEY, name);
// Mime Magic version is likely to be less precise
if (typeFromMagic != null) {
@@ -410,7 +411,7 @@ public class TestContainerAwareDetector {
// With truncated data + filename, we can use the filename to specialise
m = new Metadata();
- m.add(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xlsx");
+ m.add(TikaCoreProperties.RESOURCE_NAME_KEY, "testEXCEL.xlsx");
try (TikaInputStream xlsx = getTruncatedFile("testEXCEL.xlsx", 300)) {
assertEquals(
MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet"),
@@ -427,7 +428,7 @@ public class TestContainerAwareDetector {
// Finally a truncated OLE2 file, with a filename available
m = new Metadata();
- m.add(Metadata.RESOURCE_NAME_KEY, "testEXCEL.xls");
+ m.add(TikaCoreProperties.RESOURCE_NAME_KEY, "testEXCEL.xls");
try (TikaInputStream xls = getTruncatedFile("testEXCEL.xls", 400)) {
assertEquals(
MediaType.application("vnd.ms-excel"),
diff --git a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 1974b5f..da0e311 100644
--- a/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -33,6 +33,7 @@ import java.net.URL;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.junit.Before;
import org.junit.Test;
@@ -1185,7 +1186,7 @@ public class TestMimeTypes {
"/test-documents/" + filename)) {
assertNotNull("Test file not found: " + filename, stream);
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, filename);
assertEquals(expected, repo.detect(stream, metadata).toString());
}
}
@@ -1193,7 +1194,7 @@ public class TestMimeTypes {
private void assertTypeByName(String expected, String filename)
throws IOException {
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, filename);
assertEquals(expected, repo.detect(null, metadata).toString());
}
@@ -1237,7 +1238,7 @@ public class TestMimeTypes {
"/test-documents/" + filename)) {
assertNotNull("Test document not found: " + filename, stream);
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, filename);
return repo.detect(stream, metadata);
}
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
index a46564e..10d2a0f 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
@@ -91,7 +91,7 @@ public class AutoDetectParserTest extends TikaTest {
+ tp.resourceRealName);
}
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, tp.resourceStatedName);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, tp.resourceStatedName);
metadata.set(Metadata.CONTENT_TYPE, tp.statedType);
ContentHandler handler = new BodyContentHandler();
new AutoDetectParser(tika).parse(input, handler, metadata);
@@ -345,8 +345,8 @@ public class AutoDetectParserTest extends TikaTest {
// Check some of the common metadata
// Old style metadata
- assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
- assertEquals("Test Title", metadata.get(Metadata.TITLE));
+ assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
// New style metadata
assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
@@ -401,7 +401,7 @@ public class AutoDetectParserTest extends TikaTest {
for (int i = 0; i < exts.length; i++) {
Metadata m = new Metadata();
- m.set(Metadata.RESOURCE_NAME_KEY, "file." + exts[i]);
+ m.set(TikaCoreProperties.RESOURCE_NAME_KEY, "file." + exts[i]);
try {
getXML(TikaInputStream.get(new byte[0]), new AutoDetectParser(), m);
fail("should have thrown zero byte exception");
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/BouncyCastleDigestingParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/BouncyCastleDigestingParserTest.java
index 1d2861b..836e464 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/BouncyCastleDigestingParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/BouncyCastleDigestingParserTest.java
@@ -45,7 +45,7 @@ import org.junit.Test;
public class BouncyCastleDigestingParserTest extends TikaTest {
private final static String P = TikaCoreProperties.TIKA_META_PREFIX+
- "digest"+Metadata.NAMESPACE_PREFIX_DELIMITER;
+ "digest"+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
private final static int UNLIMITED = 1000000;//well, not really, but longer than input file
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java
index 931718e..6d2739e 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/DigestingParserTest.java
@@ -45,7 +45,7 @@ import org.junit.Test;
public class DigestingParserTest extends TikaTest {
private final static String P = TikaCoreProperties.TIKA_META_PREFIX+
- "digest"+Metadata.NAMESPACE_PREFIX_DELIMITER;
+ "digest"+TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER;
private final static int UNLIMITED = 1000000;//well, not really, but longer than input file
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
index 4889b38..1a853d9 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
@@ -32,7 +32,7 @@ import org.apache.commons.io.IOUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.utils.CommonsDigester;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.apache.tika.sax.ContentHandlerFactory;
@@ -179,7 +179,7 @@ public class RecursiveParserWrapperTest {
targets.add("/image1.emf");
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded.docx");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test_recursive_embedded.docx");
List<Metadata> list = getMetadata(metadata,
new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.XML, -1));
Metadata container = list.get(0);
@@ -199,7 +199,7 @@ public class RecursiveParserWrapperTest {
@Test
public void testEmbeddedNPE() throws Exception {
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded_npe.docx");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test_recursive_embedded_npe.docx");
List<Metadata> list = getMetadata(metadata,
new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1));
//default behavior (user doesn't specify whether or not to catch embedded exceptions
@@ -209,7 +209,7 @@ public class RecursiveParserWrapperTest {
assertContains("java.lang.NullPointerException", mockNPEMetadata.get(RecursiveParserWrapper.EMBEDDED_EXCEPTION));
metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded_npe.docx");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test_recursive_embedded_npe.docx");
list = getMetadata(metadata,
new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1),
false, null);
@@ -226,7 +226,7 @@ public class RecursiveParserWrapperTest {
//that the first element of the returned list is the container document
//and the second is the embedded content
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "embedded_then_npe.xml");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "embedded_then_npe.xml");
ParseContext context = new ParseContext();
Parser wrapped = new AutoDetectParser();
@@ -254,18 +254,18 @@ public class RecursiveParserWrapperTest {
Metadata outerMetadata = metadataList.get(0);
Metadata embeddedMetadata = metadataList.get(1);
assertContains("main_content", outerMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
- assertEquals("embedded_then_npe.xml", outerMetadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
+ assertEquals("embedded_then_npe.xml", outerMetadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertEquals("Nikolai Lobachevsky", outerMetadata.get("author"));
assertContains("some_embedded_content", embeddedMetadata.get(RecursiveParserWrapper.TIKA_CONTENT));
- assertEquals("embed1.xml", embeddedMetadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY));
+ assertEquals("embed1.xml", embeddedMetadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertEquals("embeddedAuthor", embeddedMetadata.get("author"));
}
@Test
public void testDigesters() throws Exception {
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, "test_recursive_embedded.docx");
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, "test_recursive_embedded.docx");
List<Metadata> list = getMetadata(metadata,
new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, -1),
true, new CommonsDigester(100000, CommonsDigester.DigestAlgorithm.MD5));
@@ -288,7 +288,7 @@ public class RecursiveParserWrapperTest {
}
RecursiveParserWrapper wrapper = new RecursiveParserWrapper(wrapped,
contentHandlerFactory, catchEmbeddedExceptions);
- String path = metadata.get(Metadata.RESOURCE_NAME_KEY);
+ String path = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
if (path == null) {
path = "/test-documents/test_recursive_embedded.docx";
} else {
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
index 3a8a66c..f864ee7 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java
@@ -38,7 +38,7 @@ public class ClassParserTest {
assertEquals("AutoDetectParser", metadata.get(TikaCoreProperties.TITLE));
assertEquals(
"AutoDetectParser.class",
- metadata.get(Metadata.RESOURCE_NAME_KEY));
+ metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertTrue(content.contains("package org.apache.tika.parser;"));
assertTrue(content.contains(
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/code/SourceCodeParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/code/SourceCodeParserTest.java
index 17aca8b..554b060 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/code/SourceCodeParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/code/SourceCodeParserTest.java
@@ -93,7 +93,7 @@ public class SourceCodeParserTest extends TikaTest {
private Metadata createMetadata(String mimeType) {
Metadata metadata = new Metadata();
- metadata.add(Metadata.RESOURCE_NAME_KEY, "testFile");
+ metadata.add(TikaCoreProperties.RESOURCE_NAME_KEY, "testFile");
metadata.add(Metadata.CONTENT_TYPE, mimeType);
return metadata;
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
index e92ae44..56b2787 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
@@ -21,6 +21,7 @@ import static org.junit.Assert.assertNull;
import static org.apache.tika.TikaTest.assertContains;
import java.io.InputStream;
+import java.util.Arrays;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -112,22 +113,14 @@ public class DWGParserTest {
metadata.get(TikaCoreProperties.TITLE));
assertEquals("Gym class featuring a brown fox and lazy dog",
metadata.get(TikaCoreProperties.DESCRIPTION));
- assertEquals("Gym class featuring a brown fox and lazy dog",
- metadata.get(Metadata.SUBJECT));
assertEquals("Nevin Nollop",
metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Pangram, fox, dog",
- metadata.get(TikaCoreProperties.KEYWORDS));
+ assertContains("Pangram, fox, dog",
+ Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
assertEquals("Lorem ipsum",
metadata.get(TikaCoreProperties.COMMENTS).substring(0,11));
assertEquals("http://www.alfresco.com",
metadata.get(TikaCoreProperties.RELATION));
-
- // Check some of the old style metadata too
- assertEquals("The quick brown fox jumps over the lazy dog",
- metadata.get(Metadata.TITLE));
- assertEquals("Gym class featuring a brown fox and lazy dog",
- metadata.get(Metadata.SUBJECT));
String content = handler.toString();
assertContains("The quick brown fox jumps over the lazy dog", content);
@@ -149,9 +142,8 @@ public class DWGParserTest {
assertNull(metadata.get(TikaCoreProperties.TITLE));
assertNull(metadata.get(TikaCoreProperties.DESCRIPTION));
- assertNull(metadata.get(Metadata.SUBJECT));
assertNull(metadata.get(TikaCoreProperties.CREATOR));
- assertNull(metadata.get(TikaCoreProperties.KEYWORDS));
+ assertNull(metadata.get(TikaCoreProperties.SUBJECT));
assertNull(metadata.get(TikaCoreProperties.COMMENTS));
assertNull(metadata.get(TikaCoreProperties.RELATION));
@@ -175,18 +167,14 @@ public class DWGParserTest {
metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test Subject",
metadata.get(TikaCoreProperties.DESCRIPTION));
- assertEquals("Test Subject",
- metadata.get(Metadata.SUBJECT));
assertEquals("My Author",
metadata.get(TikaCoreProperties.CREATOR));
assertEquals("My keyword1, MyKeyword2",
- metadata.get(TikaCoreProperties.KEYWORDS));
+ metadata.get(TikaCoreProperties.SUBJECT));
assertEquals("This is a comment",
metadata.get(TikaCoreProperties.COMMENTS));
assertEquals("bejanpol",
metadata.get(TikaCoreProperties.MODIFIER));
- assertEquals("bejanpol",
- metadata.get(Metadata.LAST_AUTHOR));
assertEquals("http://mycompany/drawings",
metadata.get(TikaCoreProperties.RELATION));
assertEquals("MyCustomPropertyValue",
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/executable/ExecutableParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/executable/ExecutableParserTest.java
index e7bd24d..c56c7f2 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/executable/ExecutableParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/executable/ExecutableParserTest.java
@@ -22,6 +22,7 @@ import java.util.Arrays;
import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.junit.Test;
public class ExecutableParserTest extends TikaTest {
@@ -33,7 +34,7 @@ public class ExecutableParserTest extends TikaTest {
assertEquals("application/x-msdownload", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("2012-05-13T13:40:11Z",
- metadata.get(Metadata.CREATION_DATE));
+ metadata.get(TikaCoreProperties.CREATED));
assertEquals(ExecutableParser.MACHINE_x86_32,
metadata.get(ExecutableParser.MACHINE_TYPE));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java
index cb95b72..c35c782 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/font/FontParsersTest.java
@@ -57,7 +57,7 @@ public class FontParsersTest {
assertEquals("application/x-font-adobe-metric", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("TestFullName", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Fri Jul 15 17:50:51 2011", metadata.get(Metadata.CREATION_DATE));
+ assertEquals("Fri Jul 15 17:50:51 2011", metadata.get(TikaCoreProperties.CREATED));
assertEquals("TestFontName", metadata.get(MET_FONT_NAME));
assertEquals("TestFullName", metadata.get(MET_FONT_FULL_NAME));
@@ -92,7 +92,6 @@ public class FontParsersTest {
assertEquals("application/x-font-ttf", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Open Sans Bold", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("2010-12-30T11:04:00Z", metadata.get(Metadata.CREATION_DATE));
assertEquals("2010-12-30T11:04:00Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals("2011-05-05T12:37:53Z", metadata.get(TikaCoreProperties.MODIFIED));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/image/BPGParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/image/BPGParserTest.java
index 699d656..75ac636 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/image/BPGParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/image/BPGParserTest.java
@@ -71,10 +71,9 @@ public class BPGParserTest {
if (false) {
assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
- List<String> keywords = Arrays.asList(metadata.getValues(Metadata.KEYWORDS));
+ List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
assertTrue(keywords.contains("coast"));
assertTrue(keywords.contains("bird watching"));
- assertEquals(keywords, Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS)));
}
// TODO Get the exif data to be properly extracted, see TIKA-1495
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
index fdac337..8d9b528 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
@@ -121,7 +121,7 @@ public class ImageMetadataExtractorTest {
when(t1.getTagName()).thenReturn("Image Description");
when(t1.getDescription()).thenReturn("t1");
Tag t2 = mock(Tag.class);
- when(t2.getTagName()).thenReturn(Metadata.KEYWORDS);
+ when(t2.getTagName()).thenReturn(TikaCoreProperties.SUBJECT.toString());
when(t2.getDescription()).thenReturn("known");
Tag t3 = mock(Tag.class);
when(t3.getTagName()).thenReturn(TikaCoreProperties.DESCRIPTION.getName());
@@ -132,7 +132,7 @@ public class ImageMetadataExtractorTest {
new ImageMetadataExtractor.CopyUnknownFieldsHandler().handle(d, metadata);
assertEquals("t1", metadata.get("Image Description"));
assertNull("keywords should be excluded from bulk copy because it is a defined field",
- metadata.get(Metadata.KEYWORDS));
+ metadata.get(TikaCoreProperties.SUBJECT));
assertNull(metadata.get(TikaCoreProperties.DESCRIPTION));
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/image/MetadataFieldsTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/image/MetadataFieldsTest.java
index 7e3a123..07d509f 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/image/MetadataFieldsTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/image/MetadataFieldsTest.java
@@ -29,7 +29,7 @@ public class MetadataFieldsTest {
public void testIsMetadataField() {
assertFalse(MetadataFields.isMetadataField("random string that is not a field"));
assertFalse(MetadataFields.isMetadataField("xyz"));
- assertTrue(MetadataFields.isMetadataField(TikaCoreProperties.KEYWORDS));
+ assertTrue(MetadataFields.isMetadataField(TikaCoreProperties.SUBJECT));
assertTrue(MetadataFields.isMetadataField(TIFF.F_NUMBER.getName()));
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java
index df6c2cb..6ffc8b9 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/image/TiffParserTest.java
@@ -59,12 +59,9 @@ public class TiffParserTest extends TikaTest {
assertEquals("3", metadata.get(Metadata.SAMPLES_PER_PIXEL));
// Embedded XMP
- List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
+ List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
assertTrue("got " + keywords, keywords.contains("cat"));
assertTrue("got " + keywords, keywords.contains("garden"));
- List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
- assertTrue("got " + subject, subject.contains("cat"));
- assertTrue("got " + subject, subject.contains("garden"));
}
@Test
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/image/xmp/JempboxExtractorTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/image/xmp/JempboxExtractorTest.java
index cdbf5eb..0b94d77 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/image/xmp/JempboxExtractorTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/image/xmp/JempboxExtractorTest.java
@@ -19,23 +19,18 @@ package org.apache.tika.parser.image.xmp;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
-import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collection;
import org.apache.tika.TikaTest;
-import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMPMM;
-import org.apache.tika.parser.ParseContext;
import org.junit.Test;
-import javax.xml.parsers.DocumentBuilder;
-
public class JempboxExtractorTest extends TikaTest {
@Test
@@ -47,7 +42,7 @@ public class JempboxExtractorTest extends TikaTest {
metadata.set(TikaCoreProperties.DESCRIPTION, "old description");
metadata.set(TikaCoreProperties.CREATOR, "previous author");
// ... or kept in case the field is multi-value
- metadata.add(TikaCoreProperties.KEYWORDS, "oldkeyword");
+ metadata.add(TikaCoreProperties.SUBJECT, "oldkeyword");
JempboxExtractor extractor = new JempboxExtractor(metadata);
extractor.parse(stream);
@@ -56,18 +51,12 @@ public class JempboxExtractorTest extends TikaTest {
assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR));
- Collection<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
+ Collection<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
assertTrue(keywords.contains("oldkeyword"));
assertTrue(keywords.contains("grazelands"));
assertTrue(keywords.contains("nature reserve"));
assertTrue(keywords.contains("bird watching"));
assertTrue(keywords.contains("coast"));
- Collection<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
- assertTrue(subject.contains("oldkeyword"));
- assertTrue(subject.contains("grazelands"));
- assertTrue(subject.contains("nature reserve"));
- assertTrue(subject.contains("bird watching"));
- assertTrue(subject.contains("coast"));
}
@Test
@@ -82,7 +71,7 @@ public class JempboxExtractorTest extends TikaTest {
assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR));
- Collection<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
+ Collection<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
assertTrue(keywords.contains("bird watching"));
assertTrue(keywords.contains("coast"));
}
@@ -97,7 +86,7 @@ public class JempboxExtractorTest extends TikaTest {
// XnViewMp fields not understood by Jempbox
assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
- Collection<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
+ Collection<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
assertTrue(keywords.contains("coast"));
assertTrue(keywords.contains("nature reserve"));
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java
index 6cda282..6932dfe 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java
@@ -25,6 +25,8 @@ import java.util.Arrays;
import java.util.List;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
@@ -75,14 +77,14 @@ public class IWorkParserTest {
assertTrue("Insufficient metadata found " + metadata.size(), metadata.size() >= 6);
List<String> metadataKeys = Arrays.asList(metadata.names());
assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.CONTENT_TYPE));
- assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.SLIDE_COUNT.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Office.SLIDE_COUNT.getName()));
// assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Office.SLIDE_COUNT.getName()));
assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.CREATOR.getName()));
assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName()));
// Check the metadata values
assertEquals("application/vnd.apple.keynote", metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("3", metadata.get(Metadata.SLIDE_COUNT));
+ assertEquals("3", metadata.get(Office.SLIDE_COUNT));
assertEquals("1024", metadata.get(KeynoteContentHandler.PRESENTATION_WIDTH));
assertEquals("768", metadata.get(KeynoteContentHandler.PRESENTATION_HEIGHT));
assertEquals("Tika user", metadata.get(TikaCoreProperties.CREATOR));
@@ -172,20 +174,20 @@ public class IWorkParserTest {
assertTrue("Insufficient metadata found " + metadata.size(), metadata.size() >= 50);
List<String> metadataKeys = Arrays.asList(metadata.names());
assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.CONTENT_TYPE));
- assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.PAGE_COUNT.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Office.PAGE_COUNT.getName()));
assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.CREATOR.getName()));
assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName()));
assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.LAST_MODIFIED.getName()));
- assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.LANGUAGE));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.LANGUAGE.getName()));
// Check the metadata values
assertEquals("application/vnd.apple.pages", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Tika user", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Apache tika", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("2010-05-09T21:34:38+0200", metadata.get(Metadata.CREATION_DATE));
- assertEquals("2010-05-09T23:50:36+0200", metadata.get(Metadata.LAST_MODIFIED));
+ assertEquals("2010-05-09T21:34:38+0200", metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("2010-05-09T23:50:36+0200", metadata.get(TikaCoreProperties.MODIFIED));
assertEquals("en", metadata.get(TikaCoreProperties.LANGUAGE));
- assertEquals("2", metadata.get(Metadata.PAGE_COUNT));
+ assertEquals("2", metadata.get(Office.PAGE_COUNT));
String content = handler.toString();
@@ -238,14 +240,14 @@ public class IWorkParserTest {
assertTrue("Insufficient metadata found " + metadata.size(), metadata.size() >= 8);
List<String> metadataKeys = Arrays.asList(metadata.names());
assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.CONTENT_TYPE));
- assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.PAGE_COUNT.getName()));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Office.PAGE_COUNT.getName()));
assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.CREATOR.getName()));
assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.COMMENTS.getName()));
- assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(Metadata.TITLE));
+ assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName()));
assertTrue("Metadata not found in " + metadataKeys, metadataKeys.contains(TikaCoreProperties.TITLE.getName()));
// Check the metadata values
- assertEquals("2", metadata.get(Metadata.PAGE_COUNT));
+ assertEquals("2", metadata.get(Office.PAGE_COUNT));
assertEquals("Tika User", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Account checking", metadata.get(TikaCoreProperties.TITLE));
assertEquals("a comment", metadata.get(TikaCoreProperties.COMMENTS));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
index e28921a..593e210 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/jdbc/SQLite3ParserTest.java
@@ -34,6 +34,7 @@ import org.apache.tika.extractor.ParserContainerExtractor;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Database;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
@@ -65,7 +66,7 @@ public class SQLite3ParserTest extends TikaTest {
int tests = 0;
for (InputStream stream : streams) {
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, TEST_FILE_NAME);
//1) getXML closes the stream
//2) getXML runs recursively on the contents, so the embedded docs should show up
XMLResult result = getXML(stream, p, metadata);
@@ -104,7 +105,7 @@ public class SQLite3ParserTest extends TikaTest {
public void testSpacesInBodyContentHandler() throws Exception {
Parser p = new AutoDetectParser();
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, TEST_FILE_NAME);
ContentHandler handler = new BodyContentHandler(-1);
ParseContext ctx = new ParseContext();
ctx.set(Parser.class, p);
@@ -125,7 +126,7 @@ public class SQLite3ParserTest extends TikaTest {
ParseContext parseContext = new ParseContext();
parseContext.set(Parser.class, new EmptyParser());
try (InputStream is = getResourceAsStream(TEST_FILE1)) {
- metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, TEST_FILE_NAME);
p.parse(is, handler, metadata, parseContext);
}
String xml = handler.toString();
@@ -148,7 +149,7 @@ public class SQLite3ParserTest extends TikaTest {
BasicContentHandlerFactory.HANDLER_TYPE.BODY, -1));
Metadata metadata = new Metadata();
try (InputStream is = getResourceAsStream(TEST_FILE1)) {
- metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, TEST_FILE_NAME);
wrapper.parse(is, new BodyContentHandler(-1), metadata, new ParseContext());
}
List<Metadata> metadataList = wrapper.getMetadata();
@@ -182,7 +183,7 @@ public class SQLite3ParserTest extends TikaTest {
ByteCopyingHandler byteCopier = new ByteCopyingHandler();
Metadata metadata = new Metadata();
try (TikaInputStream is = TikaInputStream.get(getResourceAsStream(TEST_FILE1))) {
- metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, TEST_FILE_NAME);
ex.extract(is, ex, byteCopier);
}
assertEquals(4, byteCopier.bytes.size());
@@ -224,7 +225,7 @@ public class SQLite3ParserTest extends TikaTest {
ParserContainerExtractor ex = new ParserContainerExtractor();
InputStreamResettingHandler byteCopier = new InputStreamResettingHandler();
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, TEST_FILE_NAME);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, TEST_FILE_NAME);
try (InputStream is = getResourceAsStream(TEST_FILE1)) {
try (TikaInputStream tis = TikaInputStream.get(is)) {
ex.extract(tis, ex, byteCopier);
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
index d32dfc4..c710f23 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
@@ -92,16 +92,11 @@ public class JpegParserTest {
assertEquals("2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED));
assertEquals("Date/Time Original for when the photo was taken, unspecified time zone",
"2009-08-11T09:09:45", metadata.get(TikaCoreProperties.CREATED));
- List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
+ List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
assertTrue("'canon-55-250' expected in " + keywords, keywords.contains("canon-55-250"));
assertTrue("'moscow-birds' expected in " + keywords, keywords.contains("moscow-birds"));
assertTrue("'serbor' expected in " + keywords, keywords.contains("serbor"));
assertFalse(keywords.contains("canon-55-250 moscow-birds serbor"));
- List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
- assertTrue("'canon-55-250' expected in " + subject, subject.contains("canon-55-250"));
- assertTrue("'moscow-birds' expected in " + subject, subject.contains("moscow-birds"));
- assertTrue("'serbor' expected in " + subject, subject.contains("serbor"));
- assertFalse(subject.contains("canon-55-250 moscow-birds serbor"));
}
/**
@@ -145,8 +140,7 @@ public class JpegParserTest {
"2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED));
assertEquals("Date/Time Original should be stored in EXIF field too",
"2009-08-11T09:09:45", metadata.get(TIFF.ORIGINAL_DATE));
- assertEquals("canon-55-250", metadata.getValues(TikaCoreProperties.KEYWORDS)[0]);
- assertEquals("canon-55-250", metadata.getValues(Metadata.KEYWORDS)[0]);
+ assertEquals("canon-55-250", metadata.getValues(TikaCoreProperties.SUBJECT)[0]);
}
/**
@@ -182,10 +176,9 @@ public class JpegParserTest {
// xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands"
// but we have to replace them with underscore
- List<String> keywords = Arrays.asList(metadata.getValues(Metadata.KEYWORDS));
+ List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
assertTrue(keywords.contains("coast"));
assertTrue(keywords.contains("bird watching"));
- assertEquals(keywords, Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS)));
// Core EXIF/TIFF tags
assertEquals("103", metadata.get(Metadata.IMAGE_WIDTH));
@@ -218,10 +211,8 @@ public class JpegParserTest {
assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR));
- List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
+ List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
assertTrue("got " + keywords, keywords.contains("bird watching"));
- List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
- assertTrue("got " + subject, subject.contains("bird watching"));
}
@Test
@@ -237,7 +228,7 @@ public class JpegParserTest {
assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
// xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands"
// but we have to replace them with underscore
- String[] subject = metadata.getValues(TikaCoreProperties.KEYWORDS);
+ String[] subject = metadata.getValues(TikaCoreProperties.SUBJECT);
List<String> keywords = Arrays.asList(subject);
assertTrue("'coast'" + " not in " + keywords, keywords.contains("coast"));
assertTrue("'nature reserve'" + " not in " + keywords, keywords.contains("nature reserve"));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
index 7b48f13..20fa309 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
@@ -41,6 +41,7 @@ import java.util.List;
import java.util.Locale;
import org.apache.james.mime4j.stream.MimeConfig;
+import org.apache.tika.Tika;
import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
@@ -108,7 +109,7 @@ public class RFC822ParserTest extends TikaTest {
assertEquals("[jira] Commented: (TIKA-461) RFC822 messages not parsed",
metadata.get(TikaCoreProperties.TITLE));
assertEquals("[jira] Commented: (TIKA-461) RFC822 messages not parsed",
- metadata.get(Metadata.SUBJECT));
+ metadata.get(TikaCoreProperties.SUBJECT));
} catch (Exception e) {
fail("Exception thrown: " + e.getMessage());
}
@@ -235,7 +236,7 @@ public class RFC822ParserTest extends TikaTest {
assertEquals("If you can read this you understand the example.",
metadata.get(TikaCoreProperties.TITLE));
assertEquals("If you can read this you understand the example.",
- metadata.get(Metadata.SUBJECT));
+ metadata.get(TikaCoreProperties.SUBJECT));
} catch (Exception e) {
fail("Exception thrown: " + e.getMessage());
}
@@ -257,7 +258,7 @@ public class RFC822ParserTest extends TikaTest {
assertEquals("Air Permit Programs | Air & Radiation | US EPA",
metadata.get(TikaCoreProperties.TITLE));
assertEquals("Air Permit Programs | Air & Radiation | US EPA",
- metadata.get(Metadata.SUBJECT));
+ metadata.get(TikaCoreProperties.SUBJECT));
}
/**
@@ -313,7 +314,7 @@ public class RFC822ParserTest extends TikaTest {
assertEquals("abc", metadata.getValues(Metadata.MESSAGE_TO)[0]);
assertEquals("def", metadata.getValues(Metadata.MESSAGE_TO)[1]);
assertEquals("abcd", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("abcd", metadata.get(Metadata.SUBJECT));
+ assertEquals("abcd", metadata.get(TikaCoreProperties.SUBJECT));
assertContains("bar biz bat", handler.toString());
}
@@ -551,7 +552,7 @@ public class RFC822ParserTest extends TikaTest {
assertEquals(4, metadataList.size());
assertEquals("text/plain; charset=UTF-8", metadataList.get(1).get(Metadata.CONTENT_TYPE));
assertEquals("image/png", metadataList.get(2).get(Metadata.CONTENT_TYPE));
- assertEquals("testPNG.png", metadataList.get(2).get(Metadata.RESOURCE_NAME_KEY));
+ assertEquals("testPNG.png", metadataList.get(2).get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertContains("This email has a PNG attachment included in it", metadataList.get(1).get(RecursiveParserWrapper.TIKA_CONTENT));
assertEquals(null, metadataList.get(1).get(Metadata.CONTENT_DISPOSITION));
assertEquals("attachment; filename=\"testPNG.png\"", metadataList.get(2).get(Metadata.CONTENT_DISPOSITION));
@@ -570,7 +571,7 @@ public class RFC822ParserTest extends TikaTest {
assertEquals("text/html; charset=UTF-8", seenMetadata.get(2).get(Metadata.CONTENT_TYPE));
assertEquals("UTF-8", seenMetadata.get(2).get(Metadata.CONTENT_ENCODING));
assertEquals("attachment; filename=\"logo.gif\"", seenMetadata.get(3).get(Metadata.CONTENT_DISPOSITION));
- assertEquals("logo.gif", seenMetadata.get(3).get(Metadata.RESOURCE_NAME_KEY));
+ assertEquals("logo.gif", seenMetadata.get(3).get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertEquals("image/gif", seenMetadata.get(3).get(Metadata.CONTENT_TYPE));
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java
index f1650a9..1956e58 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java
@@ -99,8 +99,7 @@ public class MboxParserTest extends TikaTest {
assertEquals("2009-06-10T03:58:45Z", mailMetadata.get(TikaCoreProperties.CREATED));
assertEquals("<au...@domain.com>", mailMetadata.get(TikaCoreProperties.CREATOR));
- assertEquals("subject", mailMetadata.get(Metadata.SUBJECT));
- assertEquals("<au...@domain.com>", mailMetadata.get(Metadata.AUTHOR));
+ assertEquals("subject", mailMetadata.get(TikaCoreProperties.SUBJECT));
assertEquals("message/rfc822", mailMetadata.get(Metadata.CONTENT_TYPE));
assertEquals("author@domain.com", mailMetadata.get("Message-From"));
assertEquals("<na...@domain.com>", mailMetadata.get("MboxParser-return-path"));
@@ -146,9 +145,8 @@ public class MboxParserTest extends TikaTest {
assertEquals("Nb. Of mails", 3, mboxParser.getTrackingMetadata().size());
Metadata firstMail = mboxParser.getTrackingMetadata().get(0);
- assertEquals("Re: question about when shuffle/sort start working", firstMail.get(Metadata.SUBJECT));
+ assertEquals("Re: question about when shuffle/sort start working", firstMail.get(TikaCoreProperties.SUBJECT));
assertEquals("Re: question about when shuffle/sort start working", firstMail.get(TikaCoreProperties.TITLE));
- assertEquals("Jothi Padmanabhan <jo...@yahoo-inc.com>", firstMail.get(Metadata.AUTHOR));
assertEquals("Jothi Padmanabhan <jo...@yahoo-inc.com>", firstMail.get(TikaCoreProperties.CREATOR));
assertEquals("core-user@hadoop.apache.org", firstMail.get(Metadata.MESSAGE_RECIPIENT_ADDRESS));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
index 98f9259..7d2b1e2 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
@@ -62,15 +62,12 @@ public class ExcelParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
// Mon Oct 01 17:13:56 BST 2007
assertEquals("2007-10-01T16:13:56Z", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2007-10-01T16:13:56Z", metadata.get(Metadata.CREATION_DATE));
// Mon Oct 01 17:31:43 BST 2007
assertEquals("2007-10-01T16:31:43Z", metadata.get(TikaCoreProperties.MODIFIED));
- assertEquals("2007-10-01T16:31:43Z", metadata.get(Metadata.DATE));
String content = handler.toString();
assertContains("Sample Excel Worksheet", content);
@@ -280,7 +277,7 @@ public class ExcelParserTest extends TikaTest {
// First try detection of Excel 5
m = new Metadata();
- m.add(Metadata.RESOURCE_NAME_KEY, "excel_5.xls");
+ m.add(TikaCoreProperties.RESOURCE_NAME_KEY, "excel_5.xls");
try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls")) {
type = detector.detect(input, m);
assertEquals("application/vnd.ms-excel", type.toString());
@@ -288,7 +285,7 @@ public class ExcelParserTest extends TikaTest {
// Now Excel 95
m = new Metadata();
- m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
+ m.add(TikaCoreProperties.RESOURCE_NAME_KEY, "excel_95.xls");
try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls")) {
type = detector.detect(input, m);
assertEquals("application/vnd.ms-excel", type.toString());
@@ -392,7 +389,6 @@ public class ExcelParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Internal spreadsheet", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Aeham Abushwashi", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Aeham Abushwashi", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("John Smith1", content);
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java
index fcf601c..0cde57f 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java
@@ -65,7 +65,7 @@ public class OldExcelParserTest extends TikaTest {
// But no other metadata
assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
- assertEquals(null, metadata.get(Metadata.SUBJECT));
+ assertEquals(null, metadata.get(TikaCoreProperties.SUBJECT));
}
/**
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
index acfb754..6e2e4b2 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
@@ -73,9 +73,6 @@ public class OutlookParserTest extends TikaTest {
assertEquals(
"L'\u00C9quipe Microsoft Outlook Express",
metadata.get(TikaCoreProperties.CREATOR));
- assertEquals(
- "L'\u00C9quipe Microsoft Outlook Express",
- metadata.get(Metadata.AUTHOR));
//ensure that "raw" header is correctly decoded
assertEquals(
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
index b3a230f..d95333d 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/POIContainerExtractionTest.java
@@ -25,7 +25,7 @@ import java.util.List;
import org.apache.tika.extractor.ContainerExtractor;
import org.apache.tika.extractor.ParserContainerExtractor;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.junit.Test;
@@ -353,13 +353,13 @@ public class POIContainerExtractionTest extends AbstractPOIContainerExtractionTe
List<Metadata> list = getRecursiveMetadata("testWORD_embeded.doc");
//.docx
assertEquals("{F4754C9B-64F5-4B40-8AF4-679732AC0607}",
- list.get(10).get(TikaMetadataKeys.EMBEDDED_STORAGE_CLASS_ID));
+ list.get(10).get(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID));
//_1345471035.ppt
assertEquals("{64818D10-4F9B-11CF-86EA-00AA00B929E8}",
- list.get(14).get(TikaMetadataKeys.EMBEDDED_STORAGE_CLASS_ID));
+ list.get(14).get(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID));
//_1345470949.xls
assertEquals("{00020820-0000-0000-C000-000000000046}",
- list.get(16).get(TikaMetadataKeys.EMBEDDED_STORAGE_CLASS_ID));
+ list.get(16).get(TikaCoreProperties.EMBEDDED_STORAGE_CLASS_ID));
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
index d2fb110..68df743 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
@@ -19,6 +19,7 @@ package org.apache.tika.parser.microsoft;
import static org.junit.Assert.assertEquals;
import java.io.InputStream;
+import java.util.Arrays;
import java.util.List;
import java.util.Locale;
@@ -49,7 +50,6 @@ public class PowerPointParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Sample Powerpoint Slide", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Sample Powerpoint Slide", content);
assertContains("Powerpoint X for Mac", content);
@@ -97,16 +97,20 @@ public class PowerPointParserTest extends TikaTest {
assertContains("Row " + row + " Col " + col, xml);
}
}
+
assertContains("Keyword1 Keyword2", xml);
assertEquals("Keyword1 Keyword2",
- metadata.get(TikaCoreProperties.KEYWORDS));
+ metadata.get(Office.KEYWORDS));
+ assertContains("Keyword1 Keyword2",
+ Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
+
assertContains("Subject is here", xml);
+ assertContains("Subject is here",
+ Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
assertEquals("Subject is here",
metadata.get(OfficeOpenXMLCore.SUBJECT));
- // TODO: Remove subject in Tika 2.0
- assertEquals("Subject is here",
- metadata.get(Metadata.SUBJECT));
+
assertContains("Suddenly some Japanese text:", xml);
// Special version of (GHQ)
@@ -201,11 +205,8 @@ public class PowerPointParserTest extends TikaTest {
assertEquals("application/vnd.ms-powerpoint", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("JOUVIN ETIENNE", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("EJ04325S", metadata.get(TikaCoreProperties.MODIFIER));
- assertEquals("EJ04325S", metadata.get(Metadata.LAST_AUTHOR));
assertEquals("2011-08-22T13:32:58Z", metadata.get(TikaCoreProperties.MODIFIED));
- assertEquals("2011-08-22T13:32:58Z", metadata.get(Metadata.DATE));
assertEquals("2011-08-22T13:30:53Z", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2011-08-22T13:30:53Z", metadata.get(Metadata.CREATION_DATE));
assertEquals("1", metadata.get(Office.SLIDE_COUNT));
assertEquals("3", metadata.get(Office.WORD_COUNT));
assertEquals("Test extraction properties pptx", metadata.get(TikaCoreProperties.TITLE));
@@ -246,9 +247,9 @@ public class PowerPointParserTest extends TikaTest {
public void testEmbeddedPDF() throws Exception {
List<Metadata> metadataList = getRecursiveMetadata("testPPT_EmbeddedPDF.ppt");
assertContains("Apache Tika project", metadataList.get(1).get(RecursiveParserWrapper.TIKA_CONTENT));
- assertEquals("3.pdf", metadataList.get(1).get(Metadata.RESOURCE_NAME_KEY));
+ assertEquals("3.pdf", metadataList.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertContains("Hello World", metadataList.get(2).get(RecursiveParserWrapper.TIKA_CONTENT));
- assertEquals("4.pdf", metadataList.get(2).get(Metadata.RESOURCE_NAME_KEY));
+ assertEquals("4.pdf", metadataList.get(2).get(TikaCoreProperties.RESOURCE_NAME_KEY));
}
@Test
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ProjectParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ProjectParserTest.java
index 049b211..3753706 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ProjectParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ProjectParserTest.java
@@ -64,10 +64,9 @@ public class ProjectParserTest {
assertEquals("The quick brown fox jumps over the lazy dog", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(OfficeOpenXMLCore.SUBJECT));
- assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(Metadata.SUBJECT));
assertEquals("Nevin Nollop", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("", metadata.get(TikaCoreProperties.MODIFIER));
- assertEquals("Pangram, fox, dog", metadata.get(TikaCoreProperties.KEYWORDS));
+ assertEquals("Pangram, fox, dog", metadata.get(TikaCoreProperties.SUBJECT));
assertEquals("Comment Vulpes vulpes comment", metadata.get(TikaCoreProperties.COMMENTS));
assertEquals("Category1", metadata.get(OfficeOpenXMLCore.CATEGORY));
@@ -75,9 +74,7 @@ public class ProjectParserTest {
assertEquals("CompanyA", metadata.get(OfficeOpenXMLExtended.COMPANY));
assertEquals("2011-11-24T10:58:00Z", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2011-11-24T10:58:00Z", metadata.get(Metadata.CREATION_DATE));
assertEquals("2011-11-24T11:31:00Z", metadata.get(TikaCoreProperties.MODIFIED));
- assertEquals("2011-11-24T11:31:00Z", metadata.get(Metadata.DATE));
// Custom Project metadata is present with prefix
assertEquals("0%", metadata.get("custom:% Complete"));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PublisherParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PublisherParserTest.java
index a37e44d..47d59c8 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PublisherParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PublisherParserTest.java
@@ -43,7 +43,6 @@ public class PublisherParserTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
assertEquals("Nick Burch", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Nick Burch", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("0123456789", content);
assertContains("abcdef", content);
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java
index 8062555..b1a213a 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/TNEFParserTest.java
@@ -59,7 +59,7 @@ public class TNEFParserTest extends AbstractPOIContainerExtractionTest {
tnef.parse(stream, handler, metadata, new ParseContext());
assertEquals("This is a test message", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("This is a test message", metadata.get(Metadata.SUBJECT));
+ assertEquals("This is a test message", metadata.get(TikaCoreProperties.SUBJECT));
}
/**
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
index 31bd8ba..7456ac4 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
@@ -59,7 +59,6 @@ public class WordParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
assertContains("Sample Word Document", handler.toString());
}
}
@@ -94,7 +93,6 @@ public class WordParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
assertTrue(xml.contains("Sample Word Document"));
// Check that custom headings came through
@@ -190,9 +188,7 @@ public class WordParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("The quick brown fox jumps over the lazy dog", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(OfficeOpenXMLCore.SUBJECT));
- assertEquals("Gym class featuring a brown fox and lazy dog", metadata.get(Metadata.SUBJECT));
assertEquals("Nevin Nollop", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Nevin Nollop", metadata.get(Metadata.AUTHOR));
assertContains("The quick brown fox jumps over the lazy dog", handler.toString());
}
}
@@ -280,12 +276,10 @@ public class WordParserTest extends TikaTest {
assertContains("Keyword1 Keyword2", content);
assertEquals("Keyword1 Keyword2",
- metadata.get(TikaCoreProperties.KEYWORDS));
+ metadata.get(TikaCoreProperties.SUBJECT));
assertContains("Subject is here", content);
- // TODO: Move to OO subject in Tika 2.0
- assertEquals("Subject is here",
- metadata.get(Metadata.SUBJECT));
+
assertEquals("Subject is here",
metadata.get(OfficeOpenXMLCore.SUBJECT));
@@ -346,20 +340,16 @@ public class WordParserTest extends TikaTest {
assertEquals("application/msword", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("EJ04325S", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Etienne Jouvin", metadata.get(TikaCoreProperties.MODIFIER));
- assertEquals("Etienne Jouvin", metadata.get(Metadata.LAST_AUTHOR));
assertEquals("2012-01-03T22:14:00Z", metadata.get(TikaCoreProperties.MODIFIED));
- assertEquals("2012-01-03T22:14:00Z", metadata.get(Metadata.DATE));
assertEquals("2010-10-05T09:03:00Z", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2010-10-05T09:03:00Z", metadata.get(Metadata.CREATION_DATE));
assertEquals("Microsoft Office Word", metadata.get(OfficeOpenXMLExtended.APPLICATION));
assertEquals("1", metadata.get(Office.PAGE_COUNT));
assertEquals("2", metadata.get(Office.WORD_COUNT));
assertEquals("My Title", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("My Keyword", metadata.get(TikaCoreProperties.KEYWORDS));
+ assertEquals("My Keyword", metadata.get(TikaCoreProperties.SUBJECT));
assertEquals("Normal.dotm", metadata.get(OfficeOpenXMLExtended.TEMPLATE));
assertEquals("My Comments", metadata.get(TikaCoreProperties.COMMENTS));
// TODO: Move to OO subject in Tika 2.0
- assertEquals("My subject", metadata.get(Metadata.SUBJECT));
assertEquals("My subject", metadata.get(OfficeOpenXMLCore.SUBJECT));
assertEquals("EDF-DIT", metadata.get(OfficeOpenXMLExtended.COMPANY));
assertEquals("MyStringValue", metadata.get("custom:MyCustomString"));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index 847d347..8529892 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -55,7 +55,6 @@ import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
@@ -110,7 +109,6 @@ public class OOXMLParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Sample Excel Worksheet", content);
@@ -119,7 +117,7 @@ public class OOXMLParserTest extends TikaTest {
assertNotContained("9.0", content);
assertContains("196", content);
assertNotContained("196.0", content);
- assertEquals("false", metadata.get(TikaMetadataKeys.PROTECTED));
+ assertEquals("false", metadata.get(TikaCoreProperties.PROTECTED));
}
}
@@ -220,7 +218,7 @@ public class OOXMLParserTest extends TikaTest {
assertNotContained("10.0", content);
assertContains("cb=sum", content);
assertNotContained("13.0", content);
- assertEquals("false", metadata.get(TikaMetadataKeys.PROTECTED));
+ assertEquals("false", metadata.get(TikaCoreProperties.PROTECTED));
}
}
@@ -262,7 +260,6 @@ public class OOXMLParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Attachment Test", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Rajiv", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Rajiv", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
// Theme files don't have the text in them
@@ -331,7 +328,6 @@ public class OOXMLParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Attachment Test", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Rajiv", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Rajiv", metadata.get(Metadata.AUTHOR));
}
@@ -362,7 +358,7 @@ public class OOXMLParserTest extends TikaTest {
Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, filename);
ContentHandler handler = new BodyContentHandler();
ParseContext context = new ParseContext();
@@ -398,7 +394,6 @@ public class OOXMLParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
assertTrue(handler.toString().contains("Sample Word Document"));
}
}
@@ -437,7 +432,6 @@ public class OOXMLParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
assertTrue(xml.contains("Sample Word Document"));
// Check that custom headings came through
@@ -551,7 +545,7 @@ public class OOXMLParserTest extends TikaTest {
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("true", metadata.get(TikaMetadataKeys.PROTECTED));
+ assertEquals("true", metadata.get(TikaCoreProperties.PROTECTED));
}
}
@@ -574,7 +568,7 @@ public class OOXMLParserTest extends TikaTest {
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("true", metadata.get(TikaMetadataKeys.PROTECTED));
+ assertEquals("true", metadata.get(TikaCoreProperties.PROTECTED));
String content = handler.toString();
assertContains("Office", content);
@@ -679,15 +673,18 @@ public class OOXMLParserTest extends TikaTest {
assertContains("Keyword1 Keyword2", content);
assertEquals("Keyword1 Keyword2",
- metadata.get(Metadata.KEYWORDS));
+ metadata.get(Office.KEYWORDS));
+ assertContains("Keyword1 Keyword2",
+ Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
+
assertContains("Subject is here", content);
- // TODO: Remove subject in Tika 2.0
- assertEquals("Subject is here",
- metadata.get(Metadata.SUBJECT));
+ assertContains("Subject is here",
+ Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
assertEquals("Subject is here",
metadata.get(OfficeOpenXMLCore.SUBJECT));
+
assertContains("Suddenly some Japanese text:", content);
// Special version of (GHQ)
assertContains("\uff08\uff27\uff28\uff31\uff09", content);
@@ -750,15 +747,18 @@ public class OOXMLParserTest extends TikaTest {
assertContains("Keyword1 Keyword2", xml);
assertEquals("Keyword1 Keyword2",
- metadata.get(Metadata.KEYWORDS));
+ metadata.get(Office.KEYWORDS));
assertContains("Subject is here", xml);
- // TODO: Remove subject in Tika 2.0
- assertEquals("Subject is here",
- metadata.get(Metadata.SUBJECT));
assertEquals("Subject is here",
metadata.get(OfficeOpenXMLCore.SUBJECT));
+ assertContains("Keyword1 Keyword2",
+ Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
+ assertContains("Subject is here",
+ Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
+
+
assertContains("Suddenly some Japanese text:", xml);
// Special version of (GHQ)
assertContains("\uff08\uff27\uff28\uff31\uff09", xml);
@@ -855,11 +855,7 @@ public class OOXMLParserTest extends TikaTest {
assertEquals(null, metadata.get(TikaCoreProperties.CREATOR));
assertEquals(null, metadata.get(TikaCoreProperties.MODIFIER));
assertEquals("2006-09-12T15:06:44Z", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2006-09-12T15:06:44Z", metadata.get(Metadata.CREATION_DATE));
- assertEquals("2011-08-22T14:24:38Z", metadata.get(Metadata.LAST_MODIFIED));
assertEquals("2011-08-22T14:24:38Z", metadata.get(TikaCoreProperties.MODIFIED));
- assertEquals("2011-08-22T14:24:38Z", metadata.get(Metadata.DATE));
- assertEquals("Microsoft Excel", metadata.get(Metadata.APPLICATION_NAME));
assertEquals("Microsoft Excel", metadata.get(OfficeOpenXMLExtended.APPLICATION));
assertEquals("true", metadata.get("custom:myCustomBoolean"));
assertEquals("3", metadata.get("custom:myCustomNumber"));
@@ -885,21 +881,17 @@ public class OOXMLParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("EJ04325S", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Etienne Jouvin", metadata.get(TikaCoreProperties.MODIFIER));
- assertEquals("Etienne Jouvin", metadata.get(Metadata.LAST_AUTHOR));
assertEquals("2011-07-29T16:52:00Z", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2011-07-29T16:52:00Z", metadata.get(Metadata.CREATION_DATE));
assertEquals("2012-01-03T22:14:00Z", metadata.get(TikaCoreProperties.MODIFIED));
- assertEquals("2012-01-03T22:14:00Z", metadata.get(Metadata.DATE));
- assertEquals("Microsoft Office Word", metadata.get(Metadata.APPLICATION_NAME));
assertEquals("Microsoft Office Word", metadata.get(OfficeOpenXMLExtended.APPLICATION));
assertEquals("1", metadata.get(Office.PAGE_COUNT));
assertEquals("2", metadata.get(Office.WORD_COUNT));
assertEquals("My Title", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("My Keyword", metadata.get(TikaCoreProperties.KEYWORDS));
- assertEquals("Normal.dotm", metadata.get(Metadata.TEMPLATE));
+ assertEquals("My Keyword", metadata.get(Office.KEYWORDS));
+ assertContains("My Keyword",
+ Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
+
assertEquals("Normal.dotm", metadata.get(OfficeOpenXMLExtended.TEMPLATE));
- // TODO: Remove subject in Tika 2.0
- assertEquals("My subject", metadata.get(Metadata.SUBJECT));
assertEquals("My subject", metadata.get(OfficeOpenXMLCore.SUBJECT));
assertEquals("EDF-DIT", metadata.get(TikaCoreProperties.PUBLISHER));
assertEquals("true", metadata.get("custom:myCustomBoolean"));
@@ -926,11 +918,8 @@ public class OOXMLParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("JOUVIN ETIENNE", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("EJ04325S", metadata.get(TikaCoreProperties.MODIFIER));
- assertEquals("EJ04325S", metadata.get(Metadata.LAST_AUTHOR));
assertEquals("2011-08-22T13:30:53Z", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2011-08-22T13:30:53Z", metadata.get(Metadata.CREATION_DATE));
assertEquals("2011-08-22T13:32:49Z", metadata.get(TikaCoreProperties.MODIFIED));
- assertEquals("2011-08-22T13:32:49Z", metadata.get(Metadata.DATE));
assertEquals("1", metadata.get(Office.SLIDE_COUNT));
assertEquals("3", metadata.get(Office.WORD_COUNT));
assertEquals("Test extraction properties pptx", metadata.get(TikaCoreProperties.TITLE));
@@ -1286,7 +1275,6 @@ public class OOXMLParserTest extends TikaTest {
xml.metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Internal spreadsheet", xml.metadata.get(TikaCoreProperties.TITLE));
assertEquals("Aeham Abushwashi", xml.metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Aeham Abushwashi", xml.metadata.get(Metadata.AUTHOR));
String content = xml.xml;
assertContains("John Smith1", content);
@@ -1562,7 +1550,7 @@ public class OOXMLParserTest extends TikaTest {
AutoDetectParser parser = new AutoDetectParser();
Metadata m = new Metadata();
- m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
+ m.add(TikaCoreProperties.RESOURCE_NAME_KEY, "excel.xlsb");
// Should be detected correctly
MediaType type;
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXSLFExtractorTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXSLFExtractorTest.java
index 8385263..1da0864 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXSLFExtractorTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXSLFExtractorTest.java
@@ -33,6 +33,7 @@ import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
@@ -180,7 +181,6 @@ public class SXSLFExtractorTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Attachment Test", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Rajiv", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Rajiv", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
// Theme files don't have the text in them
@@ -249,7 +249,6 @@ public class SXSLFExtractorTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Attachment Test", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Rajiv", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Rajiv", metadata.get(Metadata.AUTHOR));
}
@@ -279,7 +278,7 @@ public class SXSLFExtractorTest extends TikaTest {
Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
- metadata.set(Metadata.RESOURCE_NAME_KEY, filename);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, filename);
ContentHandler handler = new BodyContentHandler();
try (InputStream input = getResourceAsStream("/test-documents/" + filename)) {
@@ -337,14 +336,9 @@ public class SXSLFExtractorTest extends TikaTest {
assertContains("Keyword1 Keyword2", xml);
assertEquals("Keyword1 Keyword2",
- metadata.get(Metadata.KEYWORDS));
+ metadata.get(Office.KEYWORDS));
assertContains("Subject is here", xml);
- // TODO: Remove subject in Tika 2.0
- assertEquals("Subject is here",
- metadata.get(Metadata.SUBJECT));
- assertEquals("Subject is here",
- metadata.get(OfficeOpenXMLCore.SUBJECT));
assertContains("Suddenly some Japanese text:", xml);
// Special version of (GHQ)
@@ -407,11 +401,8 @@ public class SXSLFExtractorTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("JOUVIN ETIENNE", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("EJ04325S", metadata.get(TikaCoreProperties.MODIFIER));
- assertEquals("EJ04325S", metadata.get(Metadata.LAST_AUTHOR));
assertEquals("2011-08-22T13:30:53Z", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2011-08-22T13:30:53Z", metadata.get(Metadata.CREATION_DATE));
assertEquals("2011-08-22T13:32:49Z", metadata.get(TikaCoreProperties.MODIFIED));
- assertEquals("2011-08-22T13:32:49Z", metadata.get(Metadata.DATE));
assertEquals("1", metadata.get(Office.SLIDE_COUNT));
assertEquals("3", metadata.get(Office.WORD_COUNT));
assertEquals("Test extraction properties pptx", metadata.get(TikaCoreProperties.TITLE));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
index 063a062..35f5716 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
@@ -30,6 +30,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
+import java.util.concurrent.ArrayBlockingQueue;
import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
@@ -194,7 +195,6 @@ public class SXWPFExtractorTest extends TikaTest {
xmlResult.metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Sample Word Document", xmlResult.metadata.get(TikaCoreProperties.TITLE));
assertEquals("Keith Bennett", xmlResult.metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Keith Bennett", xmlResult.metadata.get(Metadata.AUTHOR));
assertTrue(xmlResult.xml.contains("Sample Word Document"));
}
@@ -228,7 +228,6 @@ public class SXWPFExtractorTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
assertTrue(xml.contains("Sample Word Document"));
// Check that custom headings came through
assertTrue(xml.contains("<h1 class=\"title\">"));
@@ -383,12 +382,9 @@ public class SXWPFExtractorTest extends TikaTest {
assertContains("Keyword1 Keyword2", content);
assertEquals("Keyword1 Keyword2",
- metadata.get(Metadata.KEYWORDS));
+ metadata.get(Office.KEYWORDS));
assertContains("Subject is here", content);
- // TODO: Remove subject in Tika 2.0
- assertEquals("Subject is here",
- metadata.get(Metadata.SUBJECT));
assertEquals("Subject is here",
metadata.get(OfficeOpenXMLCore.SUBJECT));
@@ -419,21 +415,16 @@ public class SXWPFExtractorTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("EJ04325S", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Etienne Jouvin", metadata.get(TikaCoreProperties.MODIFIER));
- assertEquals("Etienne Jouvin", metadata.get(Metadata.LAST_AUTHOR));
assertEquals("2011-07-29T16:52:00Z", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2011-07-29T16:52:00Z", metadata.get(Metadata.CREATION_DATE));
assertEquals("2012-01-03T22:14:00Z", metadata.get(TikaCoreProperties.MODIFIED));
- assertEquals("2012-01-03T22:14:00Z", metadata.get(Metadata.DATE));
- assertEquals("Microsoft Office Word", metadata.get(Metadata.APPLICATION_NAME));
assertEquals("Microsoft Office Word", metadata.get(OfficeOpenXMLExtended.APPLICATION));
assertEquals("1", metadata.get(Office.PAGE_COUNT));
assertEquals("2", metadata.get(Office.WORD_COUNT));
assertEquals("My Title", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("My Keyword", metadata.get(TikaCoreProperties.KEYWORDS));
- assertEquals("Normal.dotm", metadata.get(Metadata.TEMPLATE));
+ assertEquals("My Keyword", metadata.get(Office.KEYWORDS));
+ assertContains("My Keyword",
+ Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
assertEquals("Normal.dotm", metadata.get(OfficeOpenXMLExtended.TEMPLATE));
- // TODO: Remove subject in Tika 2.0
- assertEquals("My subject", metadata.get(Metadata.SUBJECT));
assertEquals("My subject", metadata.get(OfficeOpenXMLCore.SUBJECT));
assertEquals("EDF-DIT", metadata.get(TikaCoreProperties.PUBLISHER));
assertEquals("true", metadata.get("custom:myCustomBoolean"));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/xml/XML2003ParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/xml/XML2003ParserTest.java
index 915fc05..5cb0de9 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/xml/XML2003ParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/xml/XML2003ParserTest.java
@@ -69,7 +69,7 @@ public class XML2003ParserTest extends TikaTest {
//make sure embedded docs were properly processed
assertContains("moscow-birds",
- Arrays.asList(list.get(7).getValues(TikaCoreProperties.KEYWORDS)));
+ Arrays.asList(list.get(7).getValues(TikaCoreProperties.SUBJECT)));
//check that text is extracted with breaks between elements
String txt = getText(getResourceAsStream("/test-documents/testWORD2003.xml"), new AutoDetectParser());
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
index 1c615f6..ae9d06b 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java
@@ -64,7 +64,6 @@ public class Mp3ParserTest {
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Test Title", content);
@@ -99,7 +98,6 @@ public class Mp3ParserTest {
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
// Check the textual contents
String content = handler.toString();
@@ -153,7 +151,6 @@ public class Mp3ParserTest {
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Test Title", content);
@@ -187,7 +184,6 @@ public class Mp3ParserTest {
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Test Title", content);
@@ -234,7 +230,6 @@ public class Mp3ParserTest {
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Une chason en Fran\u00e7ais", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test Artist \u2468\u2460", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist \u2468\u2460", metadata.get(Metadata.AUTHOR));
assertEquals("Test Artist \u2468\u2460", metadata.get(XMPDM.ARTIST));
assertEquals("Test Album \u2460\u2468", metadata.get(XMPDM.ALBUM));
@@ -272,7 +267,6 @@ public class Mp3ParserTest {
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Test Title", content);
@@ -351,7 +345,6 @@ public class Mp3ParserTest {
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Plus loin vers l'ouest", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Merzhin", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Merzhin", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Plus loin vers l'ouest", content);
@@ -383,7 +376,6 @@ public class Mp3ParserTest {
assertEquals("audio/mpeg", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Girl you have no faith in medicine", metadata.get(TikaCoreProperties.TITLE));
assertEquals("The White Stripes", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("The White Stripes", metadata.get(Metadata.AUTHOR));
String content = handler.toString();
assertContains("Girl you have no faith in medicine", content);
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
index 90476c6..ba3d9fc 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
@@ -56,11 +56,8 @@ public class MP4ParserTest extends TikaTest {
assertEquals("audio/mp4", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Test Title", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test Artist", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Test Artist", metadata.get(Metadata.AUTHOR));
assertEquals("2012-01-28T18:39:18Z", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2012-01-28T18:39:18Z", metadata.get(Metadata.CREATION_DATE));
assertEquals("2012-01-28T18:40:25Z", metadata.get(TikaCoreProperties.MODIFIED));
- assertEquals("2012-01-28T18:40:25Z", metadata.get(Metadata.DATE));
// Check the textual contents
String content = handler.toString();
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index b4648d1..da228dd 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -28,6 +28,7 @@ import java.util.regex.Pattern;
import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.DefaultParser;
@@ -237,7 +238,7 @@ public class TesseractOCRParserTest extends TikaTest {
assertEquals("66", m.get(Metadata.IMAGE_LENGTH));
assertEquals("8", m.get(Metadata.BITS_PER_SAMPLE));
assertEquals(null, m.get(Metadata.SAMPLES_PER_PIXEL));
- assertContains("This is a test Apache Tika imag", m.get(Metadata.COMMENTS));
+ assertContains("This is a test Apache Tika imag", m.get(TikaCoreProperties.COMMENTS));
//bmp
m = getXML("testBMP.bmp").metadata;
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
index d1ec46e..7b93271 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/odf/ODFParserTest.java
@@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.InputStream;
+import java.util.Arrays;
import java.util.List;
import org.apache.tika.TikaTest;
@@ -27,6 +28,7 @@ import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
@@ -84,18 +86,15 @@ public class ODFParserTest extends TikaTest {
assertEquals(
"application/vnd.oasis.opendocument.text",
metadata.get(Metadata.CONTENT_TYPE));
- assertEquals("en-US", metadata.get(Metadata.LANGUAGE));
- assertEquals("PT1M7S", metadata.get(Metadata.EDIT_TIME));
+ assertEquals("en-US", metadata.get(TikaCoreProperties.LANGUAGE));
+ assertEquals("PT1M7S", metadata.get(OfficeOpenXMLExtended.TOTAL_TIME));
assertEquals(
"NeoOffice/2.2$Unix OpenOffice.org_project/680m18$Build-9161",
metadata.get("generator"));
// Check date metadata, both old-style and new-style
assertEquals("2007-09-14T11:07:10", metadata.get(TikaCoreProperties.MODIFIED));
- assertEquals("2007-09-14T11:07:10", metadata.get(Metadata.MODIFIED));
- assertEquals("2007-09-14T11:07:10", metadata.get(Metadata.DATE));
assertEquals("2007-09-14T11:06:08", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2007-09-14T11:06:08", metadata.get(Metadata.CREATION_DATE));
// Check the document statistics
assertEquals("1", metadata.get(Office.PAGE_COUNT));
@@ -106,24 +105,6 @@ public class ODFParserTest extends TikaTest {
assertEquals("0", metadata.get(Office.OBJECT_COUNT));
assertEquals("0", metadata.get(Office.IMAGE_COUNT));
- // Check the Tika-1.0 style document statistics
- assertEquals("1", metadata.get(Metadata.PAGE_COUNT));
- assertEquals("1", metadata.get(Metadata.PARAGRAPH_COUNT));
- assertEquals("14", metadata.get(Metadata.WORD_COUNT));
- assertEquals("78", metadata.get(Metadata.CHARACTER_COUNT));
- assertEquals("0", metadata.get(Metadata.TABLE_COUNT));
- assertEquals("0", metadata.get(Metadata.OBJECT_COUNT));
- assertEquals("0", metadata.get(Metadata.IMAGE_COUNT));
-
- // Check the very old style statistics (these will be removed shortly)
- assertEquals("0", metadata.get("nbTab"));
- assertEquals("0", metadata.get("nbObject"));
- assertEquals("0", metadata.get("nbImg"));
- assertEquals("1", metadata.get("nbPage"));
- assertEquals("1", metadata.get("nbPara"));
- assertEquals("14", metadata.get("nbWord"));
- assertEquals("78", metadata.get("nbCharacter"));
-
// Custom metadata tags present but without values
assertEquals(null, metadata.get("custom:Info 1"));
assertEquals(null, metadata.get("custom:Info 2"));
@@ -154,21 +135,19 @@ public class ODFParserTest extends TikaTest {
"application/vnd.oasis.opendocument.formula",
metadata.get(Metadata.CONTENT_TYPE));
assertEquals(null, metadata.get(TikaCoreProperties.MODIFIED));
- assertEquals("2006-01-27T11:55:22", metadata.get(Metadata.CREATION_DATE));
+ assertEquals("2006-01-27T11:55:22", metadata.get(TikaCoreProperties.CREATED));
assertEquals("The quick brown fox jumps over the lazy dog",
metadata.get(TikaCoreProperties.TITLE));
assertEquals("Gym class featuring a brown fox and lazy dog",
- metadata.get(TikaCoreProperties.DESCRIPTION));
- assertEquals("Gym class featuring a brown fox and lazy dog",
metadata.get(OfficeOpenXMLCore.SUBJECT));
- assertEquals("Gym class featuring a brown fox and lazy dog",
- metadata.get(Metadata.SUBJECT));
- assertEquals("PT0S", metadata.get(Metadata.EDIT_TIME));
+ assertContains("Gym class featuring a brown fox and lazy dog",
+ Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
+ assertEquals("PT0S", metadata.get(OfficeOpenXMLExtended.TOTAL_TIME));
assertEquals("1", metadata.get("editing-cycles"));
assertEquals(
"OpenOffice.org/2.2$Win32 OpenOffice.org_project/680m14$Build-9134",
metadata.get("generator"));
- assertEquals("Pangram, fox, dog", metadata.get(Metadata.KEYWORDS));
+ assertEquals("Pangram, fox, dog", metadata.get(TikaCoreProperties.SUBJECT));
// User defined metadata
assertEquals("Text 1", metadata.get("custom:Info 1"));
@@ -177,13 +156,13 @@ public class ODFParserTest extends TikaTest {
assertEquals("true", metadata.get("custom:Info 4"));
// No statistics present
- assertEquals(null, metadata.get(Metadata.PAGE_COUNT));
- assertEquals(null, metadata.get(Metadata.PARAGRAPH_COUNT));
- assertEquals(null, metadata.get(Metadata.WORD_COUNT));
- assertEquals(null, metadata.get(Metadata.CHARACTER_COUNT));
- assertEquals(null, metadata.get(Metadata.TABLE_COUNT));
- assertEquals(null, metadata.get(Metadata.OBJECT_COUNT));
- assertEquals(null, metadata.get(Metadata.IMAGE_COUNT));
+ assertEquals(null, metadata.get(Office.PAGE_COUNT));
+ assertEquals(null, metadata.get(Office.PARAGRAPH_COUNT));
+ assertEquals(null, metadata.get(Office.WORD_COUNT));
+ assertEquals(null, metadata.get(Office.CHARACTER_COUNT));
+ assertEquals(null, metadata.get(Office.TABLE_COUNT));
+ assertEquals(null, metadata.get(Office.OBJECT_COUNT));
+ assertEquals(null, metadata.get(Office.IMAGE_COUNT));
assertEquals(null, metadata.get("nbTab"));
assertEquals(null, metadata.get("nbObject"));
assertEquals(null, metadata.get("nbImg"));
@@ -214,19 +193,19 @@ public class ODFParserTest extends TikaTest {
metadata.get(Metadata.CONTENT_TYPE));
assertEquals("2009-10-05T21:22:38", metadata.get(TikaCoreProperties.MODIFIED));
assertEquals("2009-10-05T19:04:01", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2009-10-05T19:04:01", metadata.get(Metadata.CREATION_DATE));
+ assertEquals("2009-10-05T19:04:01", metadata.get(TikaCoreProperties.CREATED));
assertEquals("Apache Tika", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Test document", metadata.get(OfficeOpenXMLCore.SUBJECT));
- assertEquals("Test document", metadata.get(Metadata.SUBJECT));
+ assertContains("Test document",
+ Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
assertEquals("A rather complex document", metadata.get(TikaCoreProperties.DESCRIPTION));
assertEquals("Bart Hanssens", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Bart Hanssens", metadata.get("initial-creator"));
assertEquals("2", metadata.get("editing-cycles"));
- assertEquals("PT02H03M24S", metadata.get(Metadata.EDIT_TIME));
+ assertEquals("PT02H03M24S", metadata.get(OfficeOpenXMLExtended.TOTAL_TIME));
assertEquals(
"OpenOffice.org/3.1$Unix OpenOffice.org_project/310m19$Build-9420",
metadata.get("generator"));
- assertEquals("Apache, Lucene, Tika", metadata.get(Metadata.KEYWORDS));
+ assertEquals("Apache, Lucene, Tika", metadata.get(TikaCoreProperties.SUBJECT));
// User defined metadata
assertEquals("Bart Hanssens", metadata.get("custom:Editor"));
@@ -243,24 +222,6 @@ public class ODFParserTest extends TikaTest {
assertEquals("2", metadata.get(Office.OBJECT_COUNT));
assertEquals("0", metadata.get(Office.IMAGE_COUNT));
- // Check the Tika-1.0 style document statistics
- assertEquals("2", metadata.get(Metadata.PAGE_COUNT));
- assertEquals("13", metadata.get(Metadata.PARAGRAPH_COUNT));
- assertEquals("54", metadata.get(Metadata.WORD_COUNT));
- assertEquals("351", metadata.get(Metadata.CHARACTER_COUNT));
- assertEquals("0", metadata.get(Metadata.TABLE_COUNT));
- assertEquals("2", metadata.get(Metadata.OBJECT_COUNT));
- assertEquals("0", metadata.get(Metadata.IMAGE_COUNT));
-
- // Check the old style statistics (these will be removed shortly)
- assertEquals("0", metadata.get("nbTab"));
- assertEquals("2", metadata.get("nbObject"));
- assertEquals("0", metadata.get("nbImg"));
- assertEquals("2", metadata.get("nbPage"));
- assertEquals("13", metadata.get("nbPara"));
- assertEquals("54", metadata.get("nbWord"));
- assertEquals("351", metadata.get("nbCharacter"));
-
String content = handler.toString();
assertTrue(content.contains(
"Apache Tika Tika is part of the Lucene project."
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 52814ac..93966e4 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -25,6 +25,7 @@ import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.InputStream;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -46,6 +47,7 @@ import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.extractor.ParserContainerExtractor;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.PDF;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -125,7 +127,6 @@ public class PDFParserTest extends TikaTest {
String xml = r.xml;
assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Bertrand Delacr\u00e9taz", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Bertrand Delacr\u00e9taz", metadata.get(Metadata.AUTHOR));
assertEquals("Firefox", metadata.get(TikaCoreProperties.CREATOR_TOOL));
assertEquals("Apache Tika - Apache Tika", metadata.get(TikaCoreProperties.TITLE));
@@ -161,7 +162,6 @@ public class PDFParserTest extends TikaTest {
Metadata metadata = r.metadata;
assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Document author", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("Document author", metadata.get(Metadata.AUTHOR));
assertEquals("Document title", metadata.get(TikaCoreProperties.TITLE));
assertEquals("Custom Value", metadata.get("Custom Property"));
@@ -186,9 +186,7 @@ public class PDFParserTest extends TikaTest {
assertEquals("true", metadata.get("pdf:encrypted"));
assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("The Bank of England", metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("The Bank of England", metadata.get(Metadata.AUTHOR));
assertEquals("Speeches by Andrew G Haldane", metadata.get(OfficeOpenXMLCore.SUBJECT));
- assertEquals("Speeches by Andrew G Haldane", metadata.get(Metadata.SUBJECT));
assertEquals("Rethinking the Financial Network, Speech by Andrew G Haldane, Executive Director, Financial Stability delivered at the Financial Student Association, Amsterdam on 28 April 2009", metadata.get(TikaCoreProperties.TITLE));
assertContains("RETHINKING THE FINANCIAL NETWORK", r.xml);
@@ -210,7 +208,6 @@ public class PDFParserTest extends TikaTest {
assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("The Bank of England", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Speeches by Andrew G Haldane", metadata.get(OfficeOpenXMLCore.SUBJECT));
- assertEquals("Speeches by Andrew G Haldane", metadata.get(Metadata.SUBJECT));
assertEquals("Rethinking the Financial Network, Speech by Andrew G Haldane, Executive Director, Financial Stability delivered at the Financial Student Association, Amsterdam on 28 April 2009", metadata.get(TikaCoreProperties.TITLE));
assertContains("RETHINKING THE FINANCIAL NETWORK", r.xml);
@@ -300,13 +297,11 @@ public class PDFParserTest extends TikaTest {
assertContains("Keyword1 Keyword2", content);
assertEquals("Keyword1 Keyword2",
- metadata.get(Metadata.KEYWORDS));
+ metadata.get(Office.KEYWORDS));
assertContains("Subject is here", content);
assertEquals("Subject is here",
metadata.get(OfficeOpenXMLCore.SUBJECT));
- assertEquals("Subject is here",
- metadata.get(Metadata.SUBJECT));
assertContains("Suddenly some Japanese text:", content);
// Special version of (GHQ)
@@ -643,10 +638,10 @@ public class PDFParserTest extends TikaTest {
List<Metadata> metadatas = p.getMetadata();
assertEquals(5, metadatas.size());
- assertNull(metadatas.get(0).get(Metadata.RESOURCE_NAME_KEY));
- assertEquals("image0.jpg", metadatas.get(1).get(Metadata.RESOURCE_NAME_KEY));
- assertEquals("Press Quality(1).joboptions", metadatas.get(3).get(Metadata.RESOURCE_NAME_KEY));
- assertEquals("Unit10.doc", metadatas.get(4).get(Metadata.RESOURCE_NAME_KEY));
+ assertNull(metadatas.get(0).get(TikaCoreProperties.RESOURCE_NAME_KEY));
+ assertEquals("image0.jpg", metadatas.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));
+ assertEquals("Press Quality(1).joboptions", metadatas.get(3).get(TikaCoreProperties.RESOURCE_NAME_KEY));
+ assertEquals("Unit10.doc", metadatas.get(4).get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertEquals(MediaType.image("jpeg").toString(), metadatas.get(1).get(Metadata.CONTENT_TYPE));
assertEquals(MediaType.image("tiff").toString(), metadatas.get(2).get(Metadata.CONTENT_TYPE));
assertEquals("text/plain; charset=ISO-8859-1", metadatas.get(3).get(Metadata.CONTENT_TYPE));
@@ -675,9 +670,9 @@ public class PDFParserTest extends TikaTest {
assertEquals("Invalid height.", "91", metadatas.get(1).get("height"));
assertEquals("Invalid width.", "352", metadatas.get(1).get("width"));
- assertNull(metadatas.get(0).get(Metadata.RESOURCE_NAME_KEY));
+ assertNull(metadatas.get(0).get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertEquals("image0.jb2",
- metadatas.get(1).get(Metadata.RESOURCE_NAME_KEY));
+ metadatas.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertEquals(MediaType.image("x-jbig2").toString(),
metadatas.get(1).get(Metadata.CONTENT_TYPE));
}
@@ -774,22 +769,10 @@ public class PDFParserTest extends TikaTest {
public void testMultipleAuthors() throws Exception {
XMLResult r = getXML("testPDF_twoAuthors.pdf");
- String[] keys = new String[]{
- "dc:creator",
- "meta:author",
- "creator",
- "Author"
- };
+ List<String> authors = Arrays.asList(r.metadata.getValues(TikaCoreProperties.CREATOR));
+ assertContains("Sample Author 1", authors);
+ assertContains("Sample Author 2", authors);
- for (String k : keys) {
- String[] vals = r.metadata.getValues(k);
- assertEquals("number of authors == 2 for key: " + k, 2, vals.length);
- Set<String> set = new HashSet<String>();
- set.add(vals[0]);
- set.add(vals[1]);
- assertTrue("Sample Author 1", set.contains("Sample Author 1"));
- assertTrue("Sample Author 2", set.contains("Sample Author 2"));
- }
}
//STUB test for once TIKA-1295 is fixed
@@ -902,7 +885,7 @@ public class PDFParserTest extends TikaTest {
List<Metadata> metadatas = getRecursiveMetadata("testPDF_multiFormatEmbFiles.pdf");
assertEquals("metadata size", 5, metadatas.size());
Metadata firstAttachment = metadatas.get(1);
- assertEquals("attachment file name", "Test.txt", firstAttachment.get(Metadata.RESOURCE_NAME_KEY));
+ assertEquals("attachment file name", "Test.txt", firstAttachment.get(TikaCoreProperties.RESOURCE_NAME_KEY));
}
@Test //TIKA-1374
@@ -910,13 +893,13 @@ public class PDFParserTest extends TikaTest {
List<Metadata> metadatas = getRecursiveMetadata("testPDF_multiFormatEmbFiles.pdf");
assertEquals("metadata size", 5, metadatas.size());
- assertEquals("file name", "Test.txt", metadatas.get(1).get(Metadata.RESOURCE_NAME_KEY));
+ assertEquals("file name", "Test.txt", metadatas.get(1).get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertContains("os specific", metadatas.get(1).get(RecursiveParserWrapper.TIKA_CONTENT));
- assertEquals("file name", "TestMac.txt", metadatas.get(2).get(Metadata.RESOURCE_NAME_KEY));
+ assertEquals("file name", "TestMac.txt", metadatas.get(2).get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertContains("mac embedded", metadatas.get(2).get(RecursiveParserWrapper.TIKA_CONTENT));
- assertEquals("file name", "TestDos.txt", metadatas.get(3).get(Metadata.RESOURCE_NAME_KEY));
+ assertEquals("file name", "TestDos.txt", metadatas.get(3).get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertContains("dos embedded", metadatas.get(3).get(RecursiveParserWrapper.TIKA_CONTENT));
- assertEquals("file name", "TestUnix.txt", metadatas.get(4).get(Metadata.RESOURCE_NAME_KEY));
+ assertEquals("file name", "TestUnix.txt", metadatas.get(4).get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertContains("unix embedded", metadatas.get(4).get(RecursiveParserWrapper.TIKA_CONTENT));
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/AbstractPkgTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/AbstractPkgTest.java
index 6fad531..a28f93d 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/AbstractPkgTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/AbstractPkgTest.java
@@ -80,7 +80,7 @@ public abstract class AbstractPkgTest extends TikaTest {
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context) throws IOException,
SAXException, TikaException {
- filenames.add(metadata.get(Metadata.RESOURCE_NAME_KEY));
+ filenames.add(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
mediatypes.add(metadata.get(Metadata.CONTENT_TYPE));
createdAts.add(metadata.get(TikaCoreProperties.CREATED));
modifiedAts.add(metadata.get(TikaCoreProperties.MODIFIED));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
index be0ff9f..9fc0e81 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
@@ -143,7 +143,7 @@ public class ZipParserTest extends AbstractPkgTest {
private class GatherRelIDsDocumentExtractor implements EmbeddedDocumentExtractor {
public Set<String> allRelIDs = new HashSet<String>();
public boolean shouldParseEmbedded(Metadata metadata) {
- String relID = metadata.get(Metadata.EMBEDDED_RELATIONSHIP_ID);
+ String relID = metadata.get(TikaCoreProperties.EMBEDDED_RELATIONSHIP_ID);
if (relID != null) {
allRelIDs.add(relID);
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java
index bc986c3..53e95a6 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/prt/PRTParserTest.java
@@ -43,8 +43,7 @@ public class PRTParserTest extends TikaTest {
// This file has a date
assertEquals("2011-06-20T16:54:00",
metadata.get(TikaCoreProperties.CREATED));
- assertEquals("2011-06-20T16:54:00",
- metadata.get(Metadata.CREATION_DATE));
+
// But no description
assertEquals(null, metadata.get(TikaCoreProperties.DESCRIPTION));
@@ -78,9 +77,7 @@ public class PRTParserTest extends TikaTest {
// File has both a date and a description
assertEquals("1997-04-01T08:59:00",
- metadata.get(Metadata.DATE));
- assertEquals("1997-04-01T08:59:00",
- metadata.get(Metadata.CREATION_DATE));
+ metadata.get(TikaCoreProperties.CREATED));
assertEquals("TIKA TEST PART DESCRIPTION INFORMATION\r\n",
metadata.get(TikaCoreProperties.DESCRIPTION));
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
index 8a53153..20ad4f9 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
@@ -181,7 +181,6 @@ public class RTFParserTest extends TikaTest {
assertEquals("\u30be\u30eb\u30b2\u3068\u5c3e\u5d0e\u3001\u6de1\u3005\u3068\u6700\u671f\u3000",
r.metadata.get(TikaCoreProperties.TITLE));
assertEquals("VMazel", r.metadata.get(TikaCoreProperties.CREATOR));
- assertEquals("VMazel", r.metadata.get(Metadata.AUTHOR));
assertEquals("StarWriter", r.metadata.get(TikaCoreProperties.COMMENTS));
// Special version of (GHQ)
@@ -290,14 +289,12 @@ public class RTFParserTest extends TikaTest {
}
assertContains("Keyword1 Keyword2", content);
- assertEquals("Keyword1 Keyword2",
- r.metadata.get(TikaCoreProperties.KEYWORDS));
+ assertContains("Keyword1 Keyword2",
+ Arrays.asList(r.metadata.getValues(Office.KEYWORDS)));
assertContains("Subject is here", content);
assertEquals("Subject is here",
r.metadata.get(OfficeOpenXMLCore.SUBJECT));
- assertEquals("Subject is here",
- r.metadata.get(Metadata.SUBJECT));
assertContains("Suddenly some Japanese text:", content);
// Special version of (GHQ)
@@ -439,7 +436,7 @@ public class RTFParserTest extends TikaTest {
for (Map.Entry<Integer, Pair> e : expected.entrySet()) {
Metadata metadata = metadataList.get(e.getKey());
Pair p = e.getValue();
- assertNotNull(metadata.get(Metadata.RESOURCE_NAME_KEY));
+ assertNotNull(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
//necessary to getName() because MSOffice extractor includes
//directory: _1457338524/HW.txt
assertEquals("filename equals ",
@@ -461,7 +458,7 @@ public class RTFParserTest extends TikaTest {
new BasicContentHandlerFactory(BasicContentHandlerFactory.HANDLER_TYPE.IGNORE, -1));
ContentHandler handler = new BodyContentHandler();
Metadata rootMetadata = new Metadata();
- rootMetadata.add(Metadata.RESOURCE_NAME_KEY, "testRTFRegularImages.rtf");
+ rootMetadata.add(TikaCoreProperties.RESOURCE_NAME_KEY, "testRTFRegularImages.rtf");
try (TikaInputStream tis = TikaInputStream.get(getResourceAsStream("/test-documents/testRTFRegularImages.rtf"))) {
parser.parse(tis, handler, rootMetadata, ctx);
}
@@ -472,15 +469,15 @@ public class RTFParserTest extends TikaTest {
assertTrue(meta_jpg_exif != null);
assertTrue(meta_jpg != null);
- assertTrue(Arrays.asList(meta_jpg_exif.getValues("dc:subject")).contains("serbor"));
- assertTrue(meta_jpg.get("Comments").contains("Licensed to the Apache"));
+ assertTrue(Arrays.asList(meta_jpg_exif.getValues(TikaCoreProperties.SUBJECT)).contains("serbor"));
+ assertTrue(meta_jpg.get(TikaCoreProperties.COMMENTS).contains("Licensed to the Apache"));
//make sure old metadata doesn't linger between objects
- assertFalse(Arrays.asList(meta_jpg.getValues("dc:subject")).contains("serbor"));
+ assertFalse(Arrays.asList(meta_jpg.getValues(TikaCoreProperties.SUBJECT)).contains("serbor"));
assertEquals("false", meta_jpg.get(RTFMetadata.THUMBNAIL));
assertEquals("false", meta_jpg_exif.get(RTFMetadata.THUMBNAIL));
assertEquals(50, meta_jpg.names().length);
- assertEquals(114, meta_jpg_exif.names().length);
+ assertEquals(109, meta_jpg_exif.names().length);
}
@Test
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/solidworks/SolidworksParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/solidworks/SolidworksParserTest.java
index 2d06119..a4be230 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/solidworks/SolidworksParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/solidworks/SolidworksParserTest.java
@@ -48,13 +48,13 @@ public class SolidworksParserTest extends TikaTest {
//Check properties
assertEquals("2012-04-18T10:27:29Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals(null, metadata.get(TikaCoreProperties.CONTRIBUTOR));
- assertEquals("2013-09-06T08:12:12Z", metadata.get(Metadata.MODIFIED));
+ assertEquals("2013-09-06T08:12:12Z", metadata.get(TikaCoreProperties.MODIFIED));
assertEquals("solidworks-dcom_dev", metadata.get(TikaCoreProperties.MODIFIER));
assertEquals(null, metadata.get(TikaCoreProperties.RELATION));
assertEquals(null, metadata.get(TikaCoreProperties.RIGHTS));
assertEquals(null, metadata.get(TikaCoreProperties.SOURCE));
assertEquals("", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("", metadata.get(TikaCoreProperties.KEYWORDS));
+ assertEquals("", metadata.get(TikaCoreProperties.SUBJECT));
}
}
@@ -75,13 +75,13 @@ public class SolidworksParserTest extends TikaTest {
//Check properties
assertEquals("2012-04-18T10:27:29Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals(null, metadata.get(TikaCoreProperties.CONTRIBUTOR));
- assertEquals("2013-11-28T12:38:28Z", metadata.get(Metadata.MODIFIED));
+ assertEquals("2013-11-28T12:38:28Z", metadata.get(TikaCoreProperties.MODIFIED));
assertEquals("solidworks-dcom_dev", metadata.get(TikaCoreProperties.MODIFIER));
assertEquals(null, metadata.get(TikaCoreProperties.RELATION));
assertEquals(null, metadata.get(TikaCoreProperties.RIGHTS));
assertEquals(null, metadata.get(TikaCoreProperties.SOURCE));
assertEquals("", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("", metadata.get(TikaCoreProperties.KEYWORDS));
+ assertEquals("", metadata.get(TikaCoreProperties.SUBJECT));
}
}
@@ -102,13 +102,13 @@ public class SolidworksParserTest extends TikaTest {
//Check properties
assertEquals("2012-04-25T09:51:38Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals(null, metadata.get(TikaCoreProperties.CONTRIBUTOR));
- assertEquals("2013-09-06T08:11:08Z", metadata.get(Metadata.MODIFIED));
+ assertEquals("2013-09-06T08:11:08Z", metadata.get(TikaCoreProperties.MODIFIED));
assertEquals("solidworks-dcom_dev", metadata.get(TikaCoreProperties.MODIFIER));
assertEquals(null, metadata.get(TikaCoreProperties.RELATION));
assertEquals(null, metadata.get(TikaCoreProperties.RIGHTS));
assertEquals(null, metadata.get(TikaCoreProperties.SOURCE));
assertEquals("", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("", metadata.get(TikaCoreProperties.KEYWORDS));
+ assertEquals("", metadata.get(TikaCoreProperties.SUBJECT));
}
}
@@ -130,13 +130,13 @@ public class SolidworksParserTest extends TikaTest {
//Check properties
assertEquals("2012-04-25T09:51:38Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals(null, metadata.get(TikaCoreProperties.CONTRIBUTOR));
- assertEquals("2013-11-28T12:41:49Z", metadata.get(Metadata.MODIFIED));
+ assertEquals("2013-11-28T12:41:49Z", metadata.get(TikaCoreProperties.MODIFIED));
assertEquals("solidworks-dcom_dev", metadata.get(TikaCoreProperties.MODIFIER));
assertEquals(null, metadata.get(TikaCoreProperties.RELATION));
assertEquals(null, metadata.get(TikaCoreProperties.RIGHTS));
assertEquals(null, metadata.get(TikaCoreProperties.SOURCE));
assertEquals("", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("", metadata.get(TikaCoreProperties.KEYWORDS));
+ assertEquals("", metadata.get(TikaCoreProperties.SUBJECT));
} finally {
input.close();
}
@@ -159,13 +159,13 @@ public class SolidworksParserTest extends TikaTest {
//Check properties
assertEquals("2012-07-03T12:05:29Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals(null, metadata.get(TikaCoreProperties.CONTRIBUTOR));
- assertEquals("2013-09-06T08:06:57Z", metadata.get(Metadata.MODIFIED));
+ assertEquals("2013-09-06T08:06:57Z", metadata.get(TikaCoreProperties.MODIFIED));
assertEquals("solidworks-dcom_dev", metadata.get(TikaCoreProperties.MODIFIER));
assertEquals(null, metadata.get(TikaCoreProperties.RELATION));
assertEquals(null, metadata.get(TikaCoreProperties.RIGHTS));
assertEquals(null, metadata.get(TikaCoreProperties.SOURCE));
assertEquals("", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("", metadata.get(TikaCoreProperties.KEYWORDS));
+ assertEquals("", metadata.get(TikaCoreProperties.SUBJECT));
}
}
@@ -186,13 +186,13 @@ public class SolidworksParserTest extends TikaTest {
//Check properties
assertEquals("2012-07-03T12:05:29Z", metadata.get(TikaCoreProperties.CREATED));
assertEquals(null, metadata.get(TikaCoreProperties.CONTRIBUTOR));
- assertEquals("2013-11-28T12:41:49Z", metadata.get(Metadata.MODIFIED));
+ assertEquals("2013-11-28T12:41:49Z", metadata.get(TikaCoreProperties.MODIFIED));
assertEquals("solidworks-dcom_dev", metadata.get(TikaCoreProperties.MODIFIER));
assertEquals(null, metadata.get(TikaCoreProperties.RELATION));
assertEquals(null, metadata.get(TikaCoreProperties.RIGHTS));
assertEquals(null, metadata.get(TikaCoreProperties.SOURCE));
assertEquals("", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("", metadata.get(TikaCoreProperties.KEYWORDS));
+ assertEquals("", metadata.get(TikaCoreProperties.SUBJECT));
}
}
}
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java
index 29fbc1d..b58659e 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java
@@ -48,20 +48,13 @@ public class DcXMLParserTest extends TikaTest {
// The file contains 5 dc:subject tags, which come through as
// a multi-valued Tika Metadata entry in file order
- assertEquals(true, metadata.isMultiValued(TikaCoreProperties.KEYWORDS));
- assertEquals(5, metadata.getValues(TikaCoreProperties.KEYWORDS).length);
- assertEquals("Java", metadata.getValues(TikaCoreProperties.KEYWORDS)[0]);
- assertEquals("XML", metadata.getValues(TikaCoreProperties.KEYWORDS)[1]);
- assertEquals("XSLT", metadata.getValues(TikaCoreProperties.KEYWORDS)[2]);
- assertEquals("JDOM", metadata.getValues(TikaCoreProperties.KEYWORDS)[3]);
- assertEquals("Indexation", metadata.getValues(TikaCoreProperties.KEYWORDS)[4]);
- assertEquals(true, metadata.isMultiValued(Metadata.SUBJECT));
- assertEquals(5, metadata.getValues(Metadata.SUBJECT).length);
- assertEquals("Java", metadata.getValues(Metadata.SUBJECT)[0]);
- assertEquals("XML", metadata.getValues(Metadata.SUBJECT)[1]);
- assertEquals("XSLT", metadata.getValues(Metadata.SUBJECT)[2]);
- assertEquals("JDOM", metadata.getValues(Metadata.SUBJECT)[3]);
- assertEquals("Indexation", metadata.getValues(Metadata.SUBJECT)[4]);
+ assertEquals(true, metadata.isMultiValued(TikaCoreProperties.SUBJECT));
+ assertEquals(5, metadata.getValues(TikaCoreProperties.SUBJECT).length);
+ assertEquals("Java", metadata.getValues(TikaCoreProperties.SUBJECT)[0]);
+ assertEquals("XML", metadata.getValues(TikaCoreProperties.SUBJECT)[1]);
+ assertEquals("XSLT", metadata.getValues(TikaCoreProperties.SUBJECT)[2]);
+ assertEquals("JDOM", metadata.getValues(TikaCoreProperties.SUBJECT)[3]);
+ assertEquals("Indexation", metadata.getValues(TikaCoreProperties.SUBJECT)[4]);
assertEquals(
"Framework d\'indexation des documents XML, HTML, PDF etc..",
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java
index 1e66ac7..54b1fec 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java
@@ -23,6 +23,7 @@ import java.io.InputStream;
import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.TeeContentHandler;
@@ -32,9 +33,9 @@ import org.xml.sax.ContentHandler;
public class EmptyAndDuplicateElementsXMLParserTest extends TikaTest {
private Property FIRST_NAME = Property.internalTextBag(
- "custom" + Metadata.NAMESPACE_PREFIX_DELIMITER + "FirstName");
+ "custom" + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "FirstName");
private Property LAST_NAME = Property.internalTextBag(
- "custom" + Metadata.NAMESPACE_PREFIX_DELIMITER + "LastName");
+ "custom" + TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER + "LastName");
@Test
public void testDefaultBehavior() throws Exception {
diff --git a/tika-parsers/src/test/resources/test-documents/testPST.pst b/tika-parsers/src/test/resources/test-documents/testPST.pst
index 005e215..c10a928 100644
Binary files a/tika-parsers/src/test/resources/test-documents/testPST.pst and b/tika-parsers/src/test/resources/test-documents/testPST.pst differ
diff --git a/tika-parsers/src/test/resources/test-documents/testPST_variousBodyTypes.pst b/tika-parsers/src/test/resources/test-documents/testPST_variousBodyTypes.pst
index 5846e05..2a9141a 100644
Binary files a/tika-parsers/src/test/resources/test-documents/testPST_variousBodyTypes.pst and b/tika-parsers/src/test/resources/test-documents/testPST_variousBodyTypes.pst differ
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
index 8d32fa0..8af3ad4 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/DetectorResource.java
@@ -29,6 +29,7 @@ import java.io.InputStream;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -48,7 +49,7 @@ public class DetectorResource {
String filename = TikaResource.detectFilename(httpHeaders
.getRequestHeaders());
LOG.info("Detecting media type for Filename: {}", filename);
- met.add(Metadata.RESOURCE_NAME_KEY, filename);
+ met.add(TikaCoreProperties.RESOURCE_NAME_KEY, filename);
try {
return TikaResource.getConfig().getDetector().detect(tis, met).toString();
} catch (IOException e) {
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
index 77afed9..d019152 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/TikaResource.java
@@ -58,7 +58,7 @@ import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.DigestingParser;
@@ -228,7 +228,7 @@ public class TikaResource {
public static void fillMetadata(Parser parser, Metadata metadata, ParseContext context, MultivaluedMap<String, String> httpHeaders) {
String fileName = detectFilename(httpHeaders);
if (fileName != null) {
- metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, fileName);
+ metadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, fileName);
}
String contentTypeHeader = httpHeaders.getFirst(HttpHeaders.CONTENT_TYPE);
diff --git a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
index 716d171..3189237 100644
--- a/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
+++ b/tika-server/src/main/java/org/apache/tika/server/resource/UnpackerResource.java
@@ -51,7 +51,7 @@ import org.apache.poi.util.IOUtils;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaMetadataKeys;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.parser.DigestingParser;
import org.apache.tika.parser.ParseContext;
@@ -175,7 +175,7 @@ public class UnpackerResource {
IOUtils.copy(inputStream, bos);
byte[] data = bos.toByteArray();
- String name = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY);
+ String name = metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY);
String contentType = metadata.get(org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE);
if (name == null) {
diff --git a/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
index 7cd5f1d..bdba9a8 100644
--- a/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/MetadataResourceTest.java
@@ -38,6 +38,7 @@ import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadata;
import org.apache.tika.server.resource.MetadataResource;
import org.apache.tika.server.writer.CSVMessageBodyWriter;
@@ -87,9 +88,11 @@ public class MetadataResourceTest extends CXFTestBase {
metadata.put(nextLine[0], nextLine[1]);
}
csvReader.close();
-
- assertNotNull(metadata.get("Author"));
- assertEquals("Maxim Valyanskiy", metadata.get("Author"));
+ for (String n : metadata.keySet()) {
+ System.out.println(n + " : "+metadata.get(n));
+ }
+ assertNotNull(metadata.get(TikaCoreProperties.CREATOR.getName()));
+ assertEquals("Maxim Valyanskiy", metadata.get(TikaCoreProperties.CREATOR.getName()));
assertEquals("X-TIKA:digest:MD5", "f8be45c34e8919eedba48cc8d207fbf0",
metadata.get("X-TIKA:digest:MD5"));
}
@@ -139,8 +142,8 @@ public class MetadataResourceTest extends CXFTestBase {
}
csvReader.close();
- assertNotNull(metadata.get("Author"));
- assertEquals("pavel", metadata.get("Author"));
+ assertNotNull(metadata.get(TikaCoreProperties.CREATOR.getName()));
+ assertEquals("pavel", metadata.get(TikaCoreProperties.CREATOR.getName()));
}
@Test
@@ -155,8 +158,8 @@ public class MetadataResourceTest extends CXFTestBase {
Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
Metadata metadata = JsonMetadata.fromJson(reader);
- assertNotNull(metadata.get("Author"));
- assertEquals("Maxim Valyanskiy", metadata.get("Author"));
+ assertNotNull(metadata.get(TikaCoreProperties.CREATOR));
+ assertEquals("Maxim Valyanskiy", metadata.get(TikaCoreProperties.CREATOR));
}
@Test
@@ -195,7 +198,8 @@ public class MetadataResourceTest extends CXFTestBase {
InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
- Response response = WebClient.create(endPoint + META_PATH + "/Author").type("application/msword")
+ Response response = WebClient.create(endPoint + META_PATH + "/"+TikaCoreProperties.CREATOR.getName())
+ .type("application/msword")
.accept(MediaType.TEXT_PLAIN).put(copy(stream, 12000));
Assert.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus());
String s = IOUtils.readStringFromStream((InputStream) response.getEntity());
@@ -207,12 +211,13 @@ public class MetadataResourceTest extends CXFTestBase {
InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
- Response response = WebClient.create(endPoint + META_PATH + "/Author").type("application/msword")
+ Response response = WebClient.create(endPoint + META_PATH + "/"+TikaCoreProperties.CREATOR.getName())
+ .type("application/msword")
.accept(MediaType.APPLICATION_JSON).put(copy(stream, 12000));
Assert.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus());
Metadata metadata = JsonMetadata.fromJson(new InputStreamReader(
(InputStream) response.getEntity(), UTF_8));
- assertEquals("Maxim Valyanskiy", metadata.get("Author"));
+ assertEquals("Maxim Valyanskiy", metadata.get(TikaCoreProperties.CREATOR));
assertEquals(1, metadata.names().length);
}
diff --git a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java b/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
index 56910a9..783d622 100644
--- a/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
+++ b/tika-server/src/test/java/org/apache/tika/server/RecursiveMetadataResourceTest.java
@@ -36,6 +36,8 @@ import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadataList;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.apache.tika.server.resource.RecursiveMetadataResource;
@@ -80,7 +82,7 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
assertEquals(12, metadataList.size());
- assertEquals("Microsoft Office Word", metadataList.get(0).get("Application-Name"));
+ assertEquals("Microsoft Office Word", metadataList.get(0).get(OfficeOpenXMLExtended.APPLICATION));
assertContains("plundered our seas", metadataList.get(6).get("X-TIKA:content"));
assertEquals("a38e6c7b38541af87148dee9634cb811", metadataList.get(10).get("X-TIKA:digest:MD5"));
@@ -112,8 +114,8 @@ public class RecursiveMetadataResourceTest extends CXFTestBase {
// Check results
Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
List<Metadata> metadataList = JsonMetadataList.fromJson(reader);
- assertNotNull(metadataList.get(0).get("Author"));
- assertEquals("pavel", metadataList.get(0).get("Author"));
+ assertNotNull(metadataList.get(0).get(TikaCoreProperties.CREATOR));
+ assertEquals("pavel", metadataList.get(0).get(TikaCoreProperties.CREATOR));
}
@Test
diff --git a/tika-xmp/src/main/java/org/apache/tika/xmp/XMPMetadata.java b/tika-xmp/src/main/java/org/apache/tika/xmp/XMPMetadata.java
index 6791ad7..12b7850 100644
--- a/tika-xmp/src/main/java/org/apache/tika/xmp/XMPMetadata.java
+++ b/tika-xmp/src/main/java/org/apache/tika/xmp/XMPMetadata.java
@@ -31,6 +31,7 @@ import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.Property.PropertyType;
import org.apache.tika.metadata.PropertyTypeException;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.xmp.convert.TikaToXMP;
import com.adobe.xmp.XMPDateTime;
@@ -642,7 +643,7 @@ public class XMPMetadata extends Metadata {
* or too much of them
*/
private String[] splitKey(String key) {
- String[] keyParts = key.split( Metadata.NAMESPACE_PREFIX_DELIMITER );
+ String[] keyParts = key.split( TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER );
if (keyParts.length > 0 && keyParts.length <= 2) {
return keyParts;
}
diff --git a/tika-xmp/src/main/java/org/apache/tika/xmp/convert/GenericConverter.java b/tika-xmp/src/main/java/org/apache/tika/xmp/convert/GenericConverter.java
index a2781ef..1ae8016 100644
--- a/tika-xmp/src/main/java/org/apache/tika/xmp/convert/GenericConverter.java
+++ b/tika-xmp/src/main/java/org/apache/tika/xmp/convert/GenericConverter.java
@@ -24,6 +24,7 @@ import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMPRights;
import org.apache.tika.metadata.Property.PropertyType;
@@ -51,7 +52,7 @@ public class GenericConverter extends AbstractConverter {
String[] keys = metadata.names();
for (String key : keys) {
- String[] keyParts = key.split( Metadata.NAMESPACE_PREFIX_DELIMITER );
+ String[] keyParts = key.split( TikaCoreProperties.NAMESPACE_PREFIX_DELIMITER );
if (keyParts.length > 0 && keyParts.length <= 2) {
String uri = registry.getNamespaceURI( keyParts[0] );
diff --git a/tika-xmp/src/main/java/org/apache/tika/xmp/convert/MSOfficeBinaryConverter.java b/tika-xmp/src/main/java/org/apache/tika/xmp/convert/MSOfficeBinaryConverter.java
index 2076329..888886a 100644
--- a/tika-xmp/src/main/java/org/apache/tika/xmp/convert/MSOfficeBinaryConverter.java
+++ b/tika-xmp/src/main/java/org/apache/tika/xmp/convert/MSOfficeBinaryConverter.java
@@ -66,7 +66,7 @@ public class MSOfficeBinaryConverter extends AbstractConverter {
createProperty( TikaCoreProperties.COMMENTS, XMPConst.NS_PDFX, "Comments" );
createProperty( OfficeOpenXMLExtended.COMPANY, OfficeOpenXMLExtended.NAMESPACE_URI,
"Company" );
- createCommaSeparatedArray( TikaCoreProperties.KEYWORDS, XMPConst.NS_DC, "subject",
+ createCommaSeparatedArray( TikaCoreProperties.SUBJECT, XMPConst.NS_DC, "subject",
PropertyOptions.ARRAY );
createLangAltProperty( TikaCoreProperties.DESCRIPTION, XMPConst.NS_DC, "description" );
createProperty( TikaCoreProperties.LANGUAGE, OfficeOpenXMLCore.NAMESPACE_URI, "language" );
diff --git a/tika-xmp/src/main/java/org/apache/tika/xmp/convert/MSOfficeXMLConverter.java b/tika-xmp/src/main/java/org/apache/tika/xmp/convert/MSOfficeXMLConverter.java
index 600e69c..b5463c1 100644
--- a/tika-xmp/src/main/java/org/apache/tika/xmp/convert/MSOfficeXMLConverter.java
+++ b/tika-xmp/src/main/java/org/apache/tika/xmp/convert/MSOfficeXMLConverter.java
@@ -63,7 +63,7 @@ public class MSOfficeXMLConverter extends AbstractConverter {
PropertyOptions.ARRAY_ORDERED );
createProperty( TikaCoreProperties.COMMENTS, XMPConst.NS_PDFX, "Comments" );
createProperty( TikaCoreProperties.IDENTIFIER, XMPConst.NS_DC, "identifier" );
- createCommaSeparatedArray( TikaCoreProperties.KEYWORDS, XMPConst.NS_DC, "subject",
+ createCommaSeparatedArray( TikaCoreProperties.SUBJECT, XMPConst.NS_DC, "subject",
PropertyOptions.ARRAY );
createLangAltProperty( TikaCoreProperties.DESCRIPTION, XMPConst.NS_DC, "description" );
createProperty( TikaCoreProperties.LANGUAGE, XMPConst.NS_DC, "language" );
diff --git a/tika-xmp/src/main/java/org/apache/tika/xmp/convert/OpenDocumentConverter.java b/tika-xmp/src/main/java/org/apache/tika/xmp/convert/OpenDocumentConverter.java
index 3c58570..2ad5a5c 100644
--- a/tika-xmp/src/main/java/org/apache/tika/xmp/convert/OpenDocumentConverter.java
+++ b/tika-xmp/src/main/java/org/apache/tika/xmp/convert/OpenDocumentConverter.java
@@ -23,9 +23,10 @@ import java.util.Set;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.HttpHeaders;
-import org.apache.tika.metadata.MSOffice;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
+import org.apache.tika.metadata.OfficeOpenXMLCore;
+import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.PagedText;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -64,10 +65,10 @@ public class OpenDocumentConverter extends AbstractConverter {
PropertyOptions.ARRAY_ORDERED );
createProperty( TikaCoreProperties.MODIFIED, XMPConst.NS_XMP, "ModifyDate" );
createProperty( TikaCoreProperties.COMMENTS, XMPConst.NS_PDFX, "Comments" );
- createCommaSeparatedArray( TikaCoreProperties.KEYWORDS, XMPConst.NS_DC, "subject",
+ createCommaSeparatedArray( TikaCoreProperties.SUBJECT, XMPConst.NS_DC, "subject",
PropertyOptions.ARRAY );
createLangAltProperty( TikaCoreProperties.DESCRIPTION, XMPConst.NS_DC, "description" );
- createProperty( MSOffice.EDIT_TIME, Office.NAMESPACE_URI_DOC_META, "editing-duration" );
+ createProperty( OfficeOpenXMLExtended.TOTAL_TIME, Office.NAMESPACE_URI_DOC_META, "editing-duration" );
createProperty( "editing-cycles", Office.NAMESPACE_URI_DOC_META, "editing-cycles" );
createProperty( "generator", XMPConst.NS_XMP, "CreatorTool" );
createProperty( Office.IMAGE_COUNT, Office.NAMESPACE_URI_DOC_META, "image-count" );
diff --git a/tika-xmp/src/main/java/org/apache/tika/xmp/convert/RTFConverter.java b/tika-xmp/src/main/java/org/apache/tika/xmp/convert/RTFConverter.java
index 19d7b50..148467f 100644
--- a/tika-xmp/src/main/java/org/apache/tika/xmp/convert/RTFConverter.java
+++ b/tika-xmp/src/main/java/org/apache/tika/xmp/convert/RTFConverter.java
@@ -55,7 +55,7 @@ public class RTFConverter extends AbstractConverter {
PropertyOptions.ARRAY_ORDERED );
createLangAltProperty( TikaCoreProperties.TITLE, XMPConst.NS_DC, "title" );
createLangAltProperty( TikaCoreProperties.DESCRIPTION, XMPConst.NS_DC, "description" );
- createCommaSeparatedArray( TikaCoreProperties.KEYWORDS, XMPConst.NS_DC, "subject",
+ createCommaSeparatedArray( TikaCoreProperties.SUBJECT, XMPConst.NS_DC, "subject",
PropertyOptions.ARRAY );
createProperty( OfficeOpenXMLCore.CATEGORY, XMPConst.NS_IPTCCORE, "intellectualGenre" );
createProperty( OfficeOpenXMLExtended.TEMPLATE, OfficeOpenXMLExtended.NAMESPACE_URI,
diff --git a/tika-xmp/src/test/java/org/apache/tika/xmp/TikaToXMPTest.java b/tika-xmp/src/test/java/org/apache/tika/xmp/TikaToXMPTest.java
index eb21964..860a669 100644
--- a/tika-xmp/src/test/java/org/apache/tika/xmp/TikaToXMPTest.java
+++ b/tika-xmp/src/test/java/org/apache/tika/xmp/TikaToXMPTest.java
@@ -60,7 +60,7 @@ public class TikaToXMPTest {
// language alternative
metadata.set( TikaCoreProperties.TITLE, "title" );
// comma separated array
- metadata.set( TikaCoreProperties.KEYWORDS, "keyword1,keyword2" );
+ metadata.set( TikaCoreProperties.SUBJECT, "keyword1,keyword2" );
// OOXML specific simple prop
metadata.set( TikaCoreProperties.MODIFIER, "lastModifiedBy" );
}
@@ -146,7 +146,7 @@ public class TikaToXMPTest {
// language alternative
tikaMetadata.set( TikaCoreProperties.TITLE, "title" );
// array
- tikaMetadata.set( TikaCoreProperties.KEYWORDS, new String[] { "keyword1", "keyword2" } );
+ tikaMetadata.set( TikaCoreProperties.SUBJECT, new String[] { "keyword1", "keyword2" } );
XMPMeta xmp = TikaToXMP.convert( tikaMetadata, null );
diff --git a/tika-xmp/src/test/java/org/apache/tika/xmp/XMPMetadataTest.java b/tika-xmp/src/test/java/org/apache/tika/xmp/XMPMetadataTest.java
index d57cd3e..5240010 100644
--- a/tika-xmp/src/test/java/org/apache/tika/xmp/XMPMetadataTest.java
+++ b/tika-xmp/src/test/java/org/apache/tika/xmp/XMPMetadataTest.java
@@ -63,7 +63,7 @@ public class XMPMetadataTest {
// language alternative
metadata.set( TikaCoreProperties.TITLE, "title" );
// array
- metadata.set( TikaCoreProperties.KEYWORDS, new String[] { "keyword1", "keyword2" } );
+ metadata.set( TikaCoreProperties.SUBJECT, new String[] { "keyword1", "keyword2" } );
// date
metadata.set( TikaCoreProperties.MODIFIED, "2001-01-01T01:01" );
// int simple property
@@ -109,7 +109,7 @@ public class XMPMetadataTest {
public void isMultiValued_multiProp_true() throws TikaException {
xmpMeta.process( tikaMetadata );
- assertTrue( xmpMeta.isMultiValued( TikaCoreProperties.KEYWORDS ) );
+ assertTrue( xmpMeta.isMultiValued( TikaCoreProperties.SUBJECT) );
}
@Test
@@ -130,7 +130,7 @@ public class XMPMetadataTest {
public void get_arrayProp_firstValueReturned() throws TikaException {
xmpMeta.process( tikaMetadata );
- assertEquals( "keyword1", xmpMeta.get( TikaCoreProperties.KEYWORDS ) );
+ assertEquals( "keyword1", xmpMeta.get( TikaCoreProperties.SUBJECT) );
}
@Test
@@ -173,7 +173,7 @@ public class XMPMetadataTest {
public void getValues_arrayProperty_allElementsReturned() throws TikaException {
xmpMeta.process( tikaMetadata );
- String[] values = xmpMeta.getValues( TikaCoreProperties.KEYWORDS );
+ String[] values = xmpMeta.getValues( TikaCoreProperties.SUBJECT);
assertEquals( 2, values.length );
checkArrayValues( values, "keyword" );
@@ -183,13 +183,13 @@ public class XMPMetadataTest {
public void testSetAll() {
Properties props = new Properties();
props.put( TikaCoreProperties.FORMAT.getName(), "format" );
- props.put( TikaCoreProperties.KEYWORDS.getName(), "keyword" );
+ props.put( TikaCoreProperties.SUBJECT.getName(), "keyword" );
xmpMeta.setAll( props );
assertEquals( "format", xmpMeta.get( TikaCoreProperties.FORMAT ) );
- String[] values = xmpMeta.getValues( TikaCoreProperties.KEYWORDS );
+ String[] values = xmpMeta.getValues( TikaCoreProperties.SUBJECT);
assertEquals( 1, values.length );
assertEquals( "keyword", values[0] );
@@ -220,9 +220,9 @@ public class XMPMetadataTest {
@Test
public void set_arrayProperty_ok() {
- xmpMeta.set( TikaCoreProperties.KEYWORDS, new String[] { "keyword1", "keyword2" } );
+ xmpMeta.set( TikaCoreProperties.SUBJECT, new String[] { "keyword1", "keyword2" } );
- String[] values = xmpMeta.getValues( TikaCoreProperties.KEYWORDS );
+ String[] values = xmpMeta.getValues( TikaCoreProperties.SUBJECT);
assertEquals( 2, values.length );
checkArrayValues( values, "keyword" );
--
To stop receiving notification emails like this one, please contact
tallison@apache.org.