You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/01/27 15:45:06 UTC

svn commit: r738100 - in /lucene/tika/trunk/src: main/java/org/apache/tika/detect/ test/java/org/apache/tika/detect/

Author: jukka
Date: Tue Jan 27 14:45:06 2009
New Revision: 738100

URL: http://svn.apache.org/viewvc?rev=738100&view=rev
Log:
TIKA-95: Pluggable magic header detectors

The Detector interface allows the input stream to be null.
The MagicDetector and TextDetector classes now correctly deal with that.

Modified:
    lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
    lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
    lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
    lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java

Modified: lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java?rev=738100&r1=738099&r2=738100&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java (original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/detect/MagicDetector.java Tue Jan 27 14:45:06 2009
@@ -151,8 +151,17 @@
         }
     }
 
+    /**
+     * 
+     * @param input document input stream, or <code>null</code>
+     * @param metadata ignored
+     */
     public MediaType detect(InputStream input, Metadata metadata)
             throws IOException {
+        if (input == null) {
+            return MediaType.OCTET_STREAM;
+        }
+
         long offset = 0;
 
         // Skip bytes at the beginning, using skip() or read()

Modified: lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java?rev=738100&r1=738099&r2=738100&view=diff
==============================================================================
--- lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java (original)
+++ lucene/tika/trunk/src/main/java/org/apache/tika/detect/TextDetector.java Tue Jan 27 14:45:06 2009
@@ -80,13 +80,16 @@
      * Looks at the beginning of the document input stream to determine
      * whether the document is text or not.
      *
-     * @param input document input stream
+     * @param input document input stream, or <code>null</code>
      * @param metadata ignored
      * @return "text/plain" if the input stream suggest a text document,
      *         "application/octet-stream" otherwise
      */
     public MediaType detect(InputStream input, Metadata metadata)
             throws IOException {
+        if (input == null) {
+            return MediaType.OCTET_STREAM;
+        }
         for (int i = 0; i < NUMBER_OF_BYTES_TO_TEST; i++) {
             int ch = input.read();
             if (ch == -1) {

Modified: lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java?rev=738100&r1=738099&r2=738100&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java (original)
+++ lucene/tika/trunk/src/test/java/org/apache/tika/detect/MagicDetectorTest.java Tue Jan 27 14:45:06 2009
@@ -29,6 +29,14 @@
  */
 public class MagicDetectorTest extends TestCase {
 
+    public void testDetectNull() throws Exception {
+        MediaType html = new MediaType("text", "html");
+        Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));
+        assertEquals(
+                MediaType.OCTET_STREAM,
+                detector.detect(null, new Metadata()));
+    }
+
     public void testDetectSimple() throws Exception {
         MediaType html = new MediaType("text", "html");
         Detector detector = new MagicDetector(html, "<html".getBytes("ASCII"));

Modified: lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java?rev=738100&r1=738099&r2=738100&view=diff
==============================================================================
--- lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java (original)
+++ lucene/tika/trunk/src/test/java/org/apache/tika/detect/TextDetectorTest.java Tue Jan 27 14:45:06 2009
@@ -32,6 +32,12 @@
 
     private final Detector detector = new TextDetector();
 
+    public void testDetectNull() throws Exception {
+        assertEquals(
+                MediaType.OCTET_STREAM,
+                detector.detect(null, new Metadata()));
+    }
+
     public void testDetectText() throws Exception {
         assertText(new byte[0]);