You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/01/27 15:45:06 UTC
svn commit: r738100 - in /lucene/tika/trunk/src:
main/java/org/apache/tika/detect/ test/java/org/apache/tika/detect/
Author: jukka
Date: Tue Jan 27 14:45:06 2009
New Revision: 738100
URL: http://svn.apache.org/viewvc?rev=738100&view=rev
Log:
TIKA-95: Pluggable magic header detectors
The Detector interface allows the input stream to be null.
The MagicDetector and TextDetector classes now correctly deal with that.
Modified:
lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java
Modified: lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java?rev=738100&r1=738099&r2=738100&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java (original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java Tue Jan 27 14:45:06 2009
@@ -151,8 +151,17 @@
}
}
+ /**
+ *
+ * @param input document input stream, or <code>null</code>
+ * @param metadata ignored
+ */
public MediaType detect(InputStream input, Metadata metadata)
throws IOException {
+ if (input == null) {
+ return MediaType.OCTET_STREAM;
+ }
+
long offset = 0;
// Skip bytes at the beginning, using skip() or read()
Modified: lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java?rev=738100&r1=738099&r2=738100&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java (original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java Tue Jan 27 14:45:06 2009
@@ -80,13 +80,16 @@
* Looks at the beginning of the document input stream to determine
* whether the document is text or not.
*
- * @param input document input stream
+ * @param input document input stream, or <code>null</code>
* @param metadata ignored
* @return "text/plain" if the input stream suggest a text document,
* "application/octet-stream" otherwise
*/
public MediaType detect(InputStream input, Metadata metadata)
throws IOException {
+ if (input == null) {
+ return MediaType.OCTET_STREAM;
+ }
for (int i = 0; i < NUMBER_OF_BYTES_TO_TEST; i++) {
int ch = input.read();
if (ch == -1) {
Modified: lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java?rev=738100&r1=738099&r2=738100&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java (original)
+++ lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java Tue Jan 27 14:45:06 2009
@@ -29,6 +29,14 @@
*/
public class MagicDetectorTest extends TestCase {
+ public void testDetectNull() throws Exception {
+ MediaType html = new MediaType("text", "html");
+ Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
+ assertEquals(
+ MediaType.OCTET_STREAM,
+ detector.detect(null, new Metadata()));
+ }
+
public void testDetectSimple() throws Exception {
MediaType html = new MediaType("text", "html");
Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
Modified: lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java?rev=738100&r1=738099&r2=738100&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java (original)
+++ lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java Tue Jan 27 14:45:06 2009
@@ -32,6 +32,12 @@
private final Detector detector = new TextDetector();
+ public void testDetectNull() throws Exception {
+ assertEquals(
+ MediaType.OCTET_STREAM,
+ detector.detect(null, new Metadata()));
+ }
+
public void testDetectText() throws Exception {
assertText(new byte[0]);