You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2014/11/26 17:12:11 UTC
svn commit: r1641841 - in /tika/trunk:
tika-app/src/test/java/org/apache/tika/cli/
tika-core/src/main/java/org/apache/tika/metadata/
tika-core/src/main/java/org/apache/tika/parser/
tika-parsers/src/main/java/org/apache/tika/parser/pdf/
Author: tallison
Date: Wed Nov 26 16:12:11 2014
New Revision: 1641841
URL: http://svn.apache.org/r1641841
Log:
TIKA-1488 add X-Tika as namespace
Modified:
tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
Modified: tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java?rev=1641841&r1=1641840&r2=1641841&view=diff
==============================================================================
--- tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java (original)
+++ tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java Wed Nov 26 16:12:11 2014
@@ -356,10 +356,8 @@ public class TikaCLITest {
" \"Application-Version\": \"15.0000\",\n" +
" \"Character Count\": \"28\",\n" +
" \"Character-Count-With-Spaces\": \"31\","));
- assertTrue(content.endsWith(" \"tika:embedded_resource_path\": \"test_recursive_embedded.docx/embed1.zip\"\n" +
- " }\n" +
- "]"));
- assertFalse(content.contains("tika:content"));
+ assertTrue(content.contains("\"X-TIKA:embedded_resource_path\": \"test_recursive_embedded.docx/embed1.zip\""));
+ assertFalse(content.contains("X-TIKA:content"));
}
@@ -368,7 +366,7 @@ public class TikaCLITest {
String[] params = new String[]{"-J", "-r", resourcePrefix+"test_recursive_embedded.docx"};
TikaCLI.main(params);
String content = outContent.toString("UTF-8");
- assertTrue(content.contains("\"tika:content\": \"\\u003chtml xmlns\\u003d\\\"http://www.w3.org/1999/xhtml"));
+ assertTrue(content.contains("\"X-TIKA:content\": \"\\u003chtml xmlns\\u003d\\\"http://www.w3.org/1999/xhtml"));
}
@Test
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java?rev=1641841&r1=1641840&r2=1641841&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/TikaCoreProperties.java Wed Nov 26 16:12:11 2014
@@ -51,8 +51,21 @@ public interface TikaCoreProperties {
ATTACHMENT
};
+ /**
+ * Use this to prefix metadata properties that store information
+ * about the parsing process. Users should be able to distinguish
+ * between metadata that was contained within the document and
+ * metadata about the parsing process.
+ * In Tika 2.0 (or earlier?), let's change X-ParsedBy to X-TIKA-Parsed-By.
+ */
+ public static String TIKA_META_PREFIX = "X-TIKA"+Metadata.NAMESPACE_PREFIX_DELIMITER;
/**
+ * Use this to store parse exception information in the Metadata object.
+ */
+ public static String TIKA_META_EXCEPTION_PREFIX = TIKA_META_PREFIX+"EXCEPTION"+
+ Metadata.NAMESPACE_PREFIX_DELIMITER;
+ /**
* @see DublinCore#FORMAT
*/
public static final Property FORMAT = Property.composite(DublinCore.FORMAT,
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java?rev=1641841&r1=1641840&r2=1641841&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/RecursiveParserWrapper.java Wed Nov 26 16:12:11 2014
@@ -17,10 +17,18 @@ package org.apache.tika.parser;
* limitations under the License.
*/
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Date;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.FilenameUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
+import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.sax.ContentHandlerFactory;
@@ -28,13 +36,6 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Date;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Set;
-
/**
* This is a helper class that wraps a parser in a recursive handler.
* It takes care of setting the embedded parser in the ParseContext
@@ -75,25 +76,17 @@ public class RecursiveParserWrapper impl
*/
private static final long serialVersionUID = 9086536568120690938L;
-
-
- public final static String TIKA_PREFIX = "tika:";
- public final static String TIKA_EXCEPTION_PREFIX = "tika_ex:";
-
//move this to TikaCoreProperties?
- public final static Property TIKA_CONTENT = Property.internalText(TIKA_PREFIX+"content");
- public final static Property PARSE_TIME_MILLIS = Property.internalText(TIKA_PREFIX+"parse_time_millis");
+ public final static Property TIKA_CONTENT = Property.internalText(TikaCoreProperties.TIKA_META_PREFIX+"content");
+ public final static Property PARSE_TIME_MILLIS = Property.internalText(TikaCoreProperties.TIKA_META_PREFIX+"parse_time_millis");
public final static Property WRITE_LIMIT_REACHED =
- Property.internalBoolean(TIKA_EXCEPTION_PREFIX+"write_limit_reached");
+ Property.internalBoolean(TikaCoreProperties.TIKA_META_EXCEPTION_PREFIX+"write_limit_reached");
public final static Property EMBEDDED_RESOURCE_LIMIT_REACHED =
- Property.internalBoolean(TIKA_EXCEPTION_PREFIX+"embedded_resource_limit_reached");
-
- public final static Property PARSE_EXCEPTION =
- Property.internalBoolean(TIKA_EXCEPTION_PREFIX+"parse_exception");
+ Property.internalBoolean(TikaCoreProperties.TIKA_META_EXCEPTION_PREFIX+"embedded_resource_limit_reached");
//move this to TikaCoreProperties?
public final static Property EMBEDDED_RESOURCE_PATH =
- Property.internalText(TIKA_PREFIX+"embedded_resource_path");
+ Property.internalText(TikaCoreProperties.TIKA_META_PREFIX+"embedded_resource_path");
private final Parser wrappedParser;
private final ContentHandlerFactory contentHandlerFactory;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1641841&r1=1641840&r2=1641841&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java Wed Nov 26 16:12:11 2014
@@ -254,7 +254,7 @@ public class PDFParser extends AbstractP
// TODO WARN if this XMP version is inconsistent with document header version?
}
} catch (IOException e) {
- metadata.set("pdf:metadata-xmp-parse-failed", ""+e);
+ metadata.set(TikaCoreProperties.TIKA_META_PREFIX+"pdf:metadata-xmp-parse-failed", ""+e);
}
//TODO: Let's try to move this into PDFBox.
//Attempt to determine Adobe extension level, if present: