You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2015/05/29 16:36:24 UTC

svn commit: r1682489 [11/14] - in /tika/trunk: tika-parsers/src/main/java/org/apache/tika/parser/html/ tika-parsers/src/main/java/org/apache/tika/parser/image/ tika-parsers/src/main/java/org/apache/tika/parser/image/xmp/ tika-parsers/src/main/java/org/...

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java?rev=1682489&r1=1682488&r2=1682489&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java Fri May 29 14:36:21 2015
@@ -33,15 +33,15 @@ import org.junit.Test;
 import org.xml.sax.helpers.DefaultHandler;
 
 public class JpegParserTest {
-  
+
     private final Parser parser = new JpegParser();
-    
+
     @Test
     public void testJPEG() throws Exception {
         Metadata metadata = new Metadata();
         metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
         InputStream stream =
-            getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg");
+                getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg");
         parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
 
         // Core EXIF/TIFF tags
@@ -49,7 +49,7 @@ public class JpegParserTest {
         assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
         assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
         assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
-        
+
         assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600
         assertEquals("5.6", metadata.get(Metadata.F_NUMBER));
         assertEquals("false", metadata.get(Metadata.FLASH_FIRED));
@@ -62,24 +62,24 @@ public class JpegParserTest {
         assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
         assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
         assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
-        
+
         // Check that EXIF/TIFF tags come through with their raw values too
         // (This may be removed for Tika 1.0, as we support more of them
         //  with explicit Metadata entries)
         assertEquals("Canon EOS 40D", metadata.get("Model"));
-        
+
         // Common tags
         //assertEquals("2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED));
         assertEquals("Date/Time Original for when the photo was taken, unspecified time zone",
                 "2009-08-11T09:09:45", metadata.get(TikaCoreProperties.CREATED));
         List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
         assertTrue("'canon-55-250' expected in " + keywords, keywords.contains("canon-55-250"));
-        assertTrue("'moscow-birds' expected in " + keywords, keywords.contains("moscow-birds")); 
+        assertTrue("'moscow-birds' expected in " + keywords, keywords.contains("moscow-birds"));
         assertTrue("'serbor' expected in " + keywords, keywords.contains("serbor"));
         assertFalse(keywords.contains("canon-55-250 moscow-birds serbor"));
         List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
         assertTrue("'canon-55-250' expected in " + subject, subject.contains("canon-55-250"));
-        assertTrue("'moscow-birds' expected in " + subject, subject.contains("moscow-birds")); 
+        assertTrue("'moscow-birds' expected in " + subject, subject.contains("moscow-birds"));
         assertTrue("'serbor' expected in " + subject, subject.contains("serbor"));
         assertFalse(subject.contains("canon-55-250 moscow-birds serbor"));
     }
@@ -92,19 +92,19 @@ public class JpegParserTest {
         Metadata metadata = new Metadata();
         metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
         InputStream stream =
-            getClass().getResourceAsStream("/test-documents/testJPEG_GEO.jpg");
+                getClass().getResourceAsStream("/test-documents/testJPEG_GEO.jpg");
         parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-        
+
         // Geo tags
         assertEquals("12.54321", metadata.get(Metadata.LATITUDE));
         assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
-        
+
         // Core EXIF/TIFF tags
         assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
         assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
         assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
         assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
-        
+
         assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600
         assertEquals("5.6", metadata.get(Metadata.F_NUMBER));
         assertEquals("false", metadata.get(Metadata.FLASH_FIRED));
@@ -117,7 +117,7 @@ public class JpegParserTest {
         assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
         assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
         assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
-        
+
         // Common tags
         assertEquals("Date/Time Original for when the photo was taken, unspecified time zone",
                 "2009-08-11T09:09:45", metadata.get(TikaCoreProperties.CREATED));
@@ -131,48 +131,48 @@ public class JpegParserTest {
 
     /**
      * Test for an image with the geographic information stored in a slightly
-     *  different way, see TIKA-915 for details
+     * different way, see TIKA-915 for details
      * Disabled for now, pending a fix to the underlying library
      */
     @Test
     public void testJPEGGeo2() throws Exception {
-       Metadata metadata = new Metadata();
-       metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
-       InputStream stream =
-          getClass().getResourceAsStream("/test-documents/testJPEG_GEO_2.jpg");
-       parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
-       // Geo tags should be there with 5dp, and not rounded
-       assertEquals("51.575762", metadata.get(Metadata.LATITUDE));
-       assertEquals("-1.567886", metadata.get(Metadata.LONGITUDE));
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+        InputStream stream =
+                getClass().getResourceAsStream("/test-documents/testJPEG_GEO_2.jpg");
+        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+        // Geo tags should be there with 5dp, and not rounded
+        assertEquals("51.575762", metadata.get(Metadata.LATITUDE));
+        assertEquals("-1.567886", metadata.get(Metadata.LONGITUDE));
     }
-    
+
     @Test
     public void testJPEGTitleAndDescription() throws Exception {
         Metadata metadata = new Metadata();
         metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
         InputStream stream =
-            getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg");
+                getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg");
         parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-          
+
         // embedded comments with non-ascii characters
         assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
         assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
         assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR)); // Dublin Core
         // xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands"
         // but we have to replace them with underscore
-        
+
         List<String> keywords = Arrays.asList(metadata.getValues(Metadata.KEYWORDS));
         assertTrue(keywords.contains("coast"));
         assertTrue(keywords.contains("bird watching"));
         assertEquals(keywords, Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS)));
-        
+
         // Core EXIF/TIFF tags
         assertEquals("103", metadata.get(Metadata.IMAGE_WIDTH));
         assertEquals("77", metadata.get(Metadata.IMAGE_LENGTH));
         assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
         assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
-        
+
         assertEquals("1.0E-6", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1000000
         assertEquals("2.8", metadata.get(Metadata.F_NUMBER));
         assertEquals("4.6", metadata.get(Metadata.FOCAL_LENGTH));
@@ -183,35 +183,35 @@ public class JpegParserTest {
         assertEquals("1", metadata.get(Metadata.ORIENTATION)); // Not present
         assertEquals("300.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
         assertEquals("300.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
-        assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));          
+        assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
     }
-    
+
     @Test
     public void testJPEGTitleAndDescriptionPhotoshop() throws Exception {
         Metadata metadata = new Metadata();
         metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
         InputStream stream =
-            getClass().getResourceAsStream("/test-documents/testJPEG_commented_pspcs2mac.jpg");
+                getClass().getResourceAsStream("/test-documents/testJPEG_commented_pspcs2mac.jpg");
         parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-          
+
         // embedded comments with non-ascii characters
         assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
         assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
         assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR));
         List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
-        assertTrue("got " + keywords, keywords.contains("bird watching")); 
+        assertTrue("got " + keywords, keywords.contains("bird watching"));
         List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
-        assertTrue("got " + subject, subject.contains("bird watching")); 
+        assertTrue("got " + subject, subject.contains("bird watching"));
     }
-    
+
     @Test
     public void testJPEGTitleAndDescriptionXnviewmp() throws Exception {
         Metadata metadata = new Metadata();
         metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
         InputStream stream =
-            getClass().getResourceAsStream("/test-documents/testJPEG_commented_xnviewmp026.jpg");
+                getClass().getResourceAsStream("/test-documents/testJPEG_commented_xnviewmp026.jpg");
         parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-          
+
         // XnViewMp's default comment dialog has only comment, not headline.
         // Comment is embedded only if "Write comments in XMP" is enabled in settings
         assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
@@ -220,31 +220,31 @@ public class JpegParserTest {
         String[] subject = metadata.getValues(TikaCoreProperties.KEYWORDS);
         List<String> keywords = Arrays.asList(subject);
         assertTrue("'coast'" + " not in " + keywords, keywords.contains("coast"));
-        assertTrue("'nature reserve'" + " not in " + keywords, keywords.contains("nature reserve"));     
+        assertTrue("'nature reserve'" + " not in " + keywords, keywords.contains("nature reserve"));
     }
-    
+
     @Test
     public void testJPEGoddTagComponent() throws Exception {
-       Metadata metadata = new Metadata();
-       metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
-       InputStream stream =
-           getClass().getResourceAsStream("/test-documents/testJPEG_oddTagComponent.jpg");
-       parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-       
-       assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
-       assertEquals(null, metadata.get(TikaCoreProperties.DESCRIPTION));
-       assertEquals("251", metadata.get(Metadata.IMAGE_WIDTH));
-       assertEquals("384", metadata.get(Metadata.IMAGE_LENGTH));
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+        InputStream stream =
+                getClass().getResourceAsStream("/test-documents/testJPEG_oddTagComponent.jpg");
+        parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+        assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
+        assertEquals(null, metadata.get(TikaCoreProperties.DESCRIPTION));
+        assertEquals("251", metadata.get(Metadata.IMAGE_WIDTH));
+        assertEquals("384", metadata.get(Metadata.IMAGE_LENGTH));
     }
-    
+
     @Test
     public void testJPEGEmptyEXIFDateTime() throws Exception {
         Metadata metadata = new Metadata();
         metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
         InputStream stream =
-            getClass().getResourceAsStream("/test-documents/testJPEG_EXIF_emptyDateTime.jpg");
+                getClass().getResourceAsStream("/test-documents/testJPEG_EXIF_emptyDateTime.jpg");
         parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
         assertEquals("300.0", metadata.get(TIFF.RESOLUTION_HORIZONTAL));
         assertEquals("300.0", metadata.get(TIFF.RESOLUTION_VERTICAL));
-     }
+    }
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java?rev=1682489&r1=1682488&r2=1682489&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java Fri May 29 14:36:21 2015
@@ -54,6 +54,13 @@ import org.xml.sax.helpers.DefaultHandle
 
 public class RFC822ParserTest extends TikaTest {
 
+    private static InputStream getStream(String name) {
+        InputStream stream = Thread.currentThread().getContextClassLoader()
+                .getResourceAsStream(name);
+        assertNotNull("Test file not found " + name, stream);
+        return stream;
+    }
+
     @Test
     public void testSimple() {
         Parser parser = new RFC822Parser();
@@ -73,9 +80,9 @@ public class RFC822ParserTest extends Ti
             verify(handler).endDocument();
             //note no leading spaces, and no quotes
             assertEquals("Julien Nioche (JIRA) <ji...@apache.org>", metadata.get(TikaCoreProperties.CREATOR));
-            assertEquals("[jira] Commented: (TIKA-461) RFC822 messages not parsed", 
+            assertEquals("[jira] Commented: (TIKA-461) RFC822 messages not parsed",
                     metadata.get(TikaCoreProperties.TITLE));
-            assertEquals("[jira] Commented: (TIKA-461) RFC822 messages not parsed", 
+            assertEquals("[jira] Commented: (TIKA-461) RFC822 messages not parsed",
                     metadata.get(Metadata.SUBJECT));
         } catch (Exception e) {
             fail("Exception thrown: " + e.getMessage());
@@ -103,11 +110,11 @@ public class RFC822ParserTest extends Ti
             verify(handler, times(multipackExpectedTimes)).startElement(eq(XHTMLContentHandler.XHTML), eq("p"), eq("p"), any(Attributes.class));
             verify(handler, times(multipackExpectedTimes)).endElement(XHTMLContentHandler.XHTML, "p", "p");
             verify(handler).endDocument();
-            
+
         } catch (Exception e) {
             fail("Exception thrown: " + e.getMessage());
         }
-        
+
         //repeat, this time looking at content
         parser = new RFC822Parser();
         metadata = new Metadata();
@@ -172,35 +179,35 @@ public class RFC822ParserTest extends Ti
             parser.parse(stream, handler, metadata, new ParseContext());
             //tests correct decoding of internationalized headers, both
             //quoted-printable (Q) and Base64 (B).
-            assertEquals("Keld J\u00F8rn Simonsen <ke...@dkuug.dk>", 
+            assertEquals("Keld J\u00F8rn Simonsen <ke...@dkuug.dk>",
                     metadata.get(TikaCoreProperties.CREATOR));
-            assertEquals("If you can read this you understand the example.", 
+            assertEquals("If you can read this you understand the example.",
                     metadata.get(TikaCoreProperties.TITLE));
-            assertEquals("If you can read this you understand the example.", 
+            assertEquals("If you can read this you understand the example.",
                     metadata.get(Metadata.SUBJECT));
         } catch (Exception e) {
             fail("Exception thrown: " + e.getMessage());
         }
     }
-    
+
     /**
      * The from isn't in the usual form.
      * See TIKA-618
      */
     @Test
     public void testUnusualFromAddress() throws Exception {
-       Parser parser = new RFC822Parser();
-       Metadata metadata = new Metadata();
-       InputStream stream = getStream("test-documents/testRFC822_oddfrom");
-       ContentHandler handler = mock(DefaultHandler.class);
-
-       parser.parse(stream, handler, metadata, new ParseContext());
-       assertEquals("Saved by Windows Internet Explorer 7", 
-               metadata.get(TikaCoreProperties.CREATOR));
-       assertEquals("Air Permit Programs | Air & Radiation | US EPA", 
-               metadata.get(TikaCoreProperties.TITLE));
-       assertEquals("Air Permit Programs | Air & Radiation | US EPA", 
-               metadata.get(Metadata.SUBJECT));
+        Parser parser = new RFC822Parser();
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("test-documents/testRFC822_oddfrom");
+        ContentHandler handler = mock(DefaultHandler.class);
+
+        parser.parse(stream, handler, metadata, new ParseContext());
+        assertEquals("Saved by Windows Internet Explorer 7",
+                metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("Air Permit Programs | Air & Radiation | US EPA",
+                metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("Air Permit Programs | Air & Radiation | US EPA",
+                metadata.get(Metadata.SUBJECT));
     }
 
     /**
@@ -236,36 +243,36 @@ public class RFC822ParserTest extends Ti
                 new ByteArrayInputStream(data), handler, metadata, context);
         assertEquals(name.trim(), metadata.get(TikaCoreProperties.CREATOR));
     }
-    
+
     /**
      * Test for TIKA-678 - not all headers may be present
      */
     @Test
     public void testSomeMissingHeaders() throws Exception {
-       Parser parser = new RFC822Parser();
-       Metadata metadata = new Metadata();
-       InputStream stream = getStream("test-documents/testRFC822-limitedheaders");
-       ContentHandler handler = new BodyContentHandler();
-
-       parser.parse(stream, handler, metadata, new ParseContext());
-       assertEquals(true, metadata.isMultiValued(TikaCoreProperties.CREATOR));
-       assertEquals("xyz", metadata.getValues(TikaCoreProperties.CREATOR)[0]);
-       assertEquals("abc", metadata.getValues(TikaCoreProperties.CREATOR)[1]);
-       assertEquals(true, metadata.isMultiValued(Metadata.MESSAGE_FROM));
-       assertEquals("xyz", metadata.getValues(Metadata.MESSAGE_FROM)[0]);
-       assertEquals("abc", metadata.getValues(Metadata.MESSAGE_FROM)[1]);
-       assertEquals(true, metadata.isMultiValued(Metadata.MESSAGE_TO));
-       assertEquals("abc", metadata.getValues(Metadata.MESSAGE_TO)[0]);
-       assertEquals("def", metadata.getValues(Metadata.MESSAGE_TO)[1]);
-       assertEquals("abcd", metadata.get(TikaCoreProperties.TITLE));
-       assertEquals("abcd", metadata.get(Metadata.SUBJECT));
-       assertContains("bar biz bat", handler.toString());
+        Parser parser = new RFC822Parser();
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("test-documents/testRFC822-limitedheaders");
+        ContentHandler handler = new BodyContentHandler();
+
+        parser.parse(stream, handler, metadata, new ParseContext());
+        assertEquals(true, metadata.isMultiValued(TikaCoreProperties.CREATOR));
+        assertEquals("xyz", metadata.getValues(TikaCoreProperties.CREATOR)[0]);
+        assertEquals("abc", metadata.getValues(TikaCoreProperties.CREATOR)[1]);
+        assertEquals(true, metadata.isMultiValued(Metadata.MESSAGE_FROM));
+        assertEquals("xyz", metadata.getValues(Metadata.MESSAGE_FROM)[0]);
+        assertEquals("abc", metadata.getValues(Metadata.MESSAGE_FROM)[1]);
+        assertEquals(true, metadata.isMultiValued(Metadata.MESSAGE_TO));
+        assertEquals("abc", metadata.getValues(Metadata.MESSAGE_TO)[0]);
+        assertEquals("def", metadata.getValues(Metadata.MESSAGE_TO)[1]);
+        assertEquals("abcd", metadata.get(TikaCoreProperties.TITLE));
+        assertEquals("abcd", metadata.get(Metadata.SUBJECT));
+        assertContains("bar biz bat", handler.toString());
     }
-    
+
     /**
      * Test TIKA-1028 - If the mail contains an encrypted attachment (or
-     *  an attachment that others triggers an error), parsing should carry
-     *  on for the remainder regardless
+     * an attachment that others triggers an error), parsing should carry
+     * on for the remainder regardless
      */
     @Test
     public void testEncryptedZipAttachment() throws Exception {
@@ -275,40 +282,40 @@ public class RFC822ParserTest extends Ti
         InputStream stream = getStream("test-documents/testRFC822_encrypted_zip");
         ContentHandler handler = new BodyContentHandler();
         parser.parse(stream, handler, metadata, context);
-        
+
         // Check we go the metadata
         assertEquals("Juha Haaga <ju...@gmail.com>", metadata.get(Metadata.MESSAGE_FROM));
         assertEquals("Test mail for Tika", metadata.get(TikaCoreProperties.TITLE));
-        
+
         // Check we got the message text, for both Plain Text and HTML
         assertContains("Includes encrypted zip file", handler.toString());
         assertContains("password is \"test\".", handler.toString());
         assertContains("This is the Plain Text part", handler.toString());
         assertContains("This is the HTML part", handler.toString());
-        
+
         // We won't get the contents of the zip file, but we will get the name
         assertContains("text.txt", handler.toString());
         assertNotContained("ENCRYPTED ZIP FILES", handler.toString());
-        
+
         // Try again, this time with the password supplied
         // Check that we also get the zip's contents as well
         context.set(PasswordProvider.class, new PasswordProvider() {
             public String getPassword(Metadata metadata) {
                 return "test";
             }
-         });
+        });
         stream = getStream("test-documents/testRFC822_encrypted_zip");
         handler = new BodyContentHandler();
         parser.parse(stream, handler, metadata, context);
-        
+
         assertContains("Includes encrypted zip file", handler.toString());
         assertContains("password is \"test\".", handler.toString());
         assertContains("This is the Plain Text part", handler.toString());
         assertContains("This is the HTML part", handler.toString());
-        
+
         // We do get the name of the file in the encrypted zip file
         assertContains("text.txt", handler.toString());
-        
+
         // TODO Upgrade to a version of Commons Compress with Encryption
         //  support, then verify we get the contents of the text file
         //  held within the encrypted zip
@@ -317,10 +324,10 @@ public class RFC822ParserTest extends Ti
         assertContains("ENCRYPTED ZIP FILES", handler.toString());
         assertContains("TIKA-1028", handler.toString());
     }
-    
+
     /**
      * Test TIKA-1028 - Ensure we can get the contents of an
-     *  un-encrypted zip file
+     * un-encrypted zip file
      */
     @Test
     public void testNormalZipAttachment() throws Exception {
@@ -330,26 +337,26 @@ public class RFC822ParserTest extends Ti
         InputStream stream = getStream("test-documents/testRFC822_normal_zip");
         ContentHandler handler = new BodyContentHandler();
         parser.parse(stream, handler, metadata, context);
-        
+
         // Check we go the metadata
         assertEquals("Juha Haaga <ju...@gmail.com>", metadata.get(Metadata.MESSAGE_FROM));
         assertEquals("Test mail for Tika", metadata.get(TikaCoreProperties.TITLE));
-        
+
         // Check we got the message text, for both Plain Text and HTML
         assertContains("Includes a normal, unencrypted zip file", handler.toString());
         assertContains("This is the Plain Text part", handler.toString());
         assertContains("This is the HTML part", handler.toString());
-        
+
         // We get both name and contents of the zip file's contents
         assertContains("text.txt", handler.toString());
         assertContains("TEST DATA FOR TIKA.", handler.toString());
         assertContains("This is text inside an unencrypted zip file", handler.toString());
         assertContains("TIKA-1028", handler.toString());
     }
-    
+
     /**
      * TIKA-1222 When requested, ensure that the various attachments of
-     *  the mail come through properly as embedded resources
+     * the mail come through properly as embedded resources
      */
     @Test
     public void testGetAttachmentsAsEmbeddedResources() throws Exception {
@@ -364,11 +371,11 @@ public class RFC822ParserTest extends Ti
             if (tis != null)
                 tis.close();
         }
-        
+
         // Check we found all 3 parts
         assertEquals(3, tracker.filenames.size());
         assertEquals(3, tracker.mediaTypes.size());
-        
+
         // No filenames available
         assertEquals(null, tracker.filenames.get(0));
         assertEquals(null, tracker.filenames.get(1));
@@ -378,11 +385,4 @@ public class RFC822ParserTest extends Ti
         assertEquals(MediaType.TEXT_HTML, tracker.mediaTypes.get(1));
         assertEquals(MediaType.image("gif"), tracker.mediaTypes.get(2));
     }
-
-    private static InputStream getStream(String name) {
-        InputStream stream = Thread.currentThread().getContextClassLoader()
-                                    .getResourceAsStream(name);
-        assertNotNull("Test file not found " + name, stream);
-        return stream;
-    }
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java?rev=1682489&r1=1682488&r2=1682489&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java Fri May 29 14:36:21 2015
@@ -35,137 +35,137 @@ import org.xml.sax.ContentHandler;
 
 public class MboxParserTest {
 
-  protected ParseContext recursingContext;
-  private Parser autoDetectParser;
-  private TypeDetector typeDetector;
-  private MboxParser mboxParser;
-
-  @Before
-  public void setUp() throws Exception {
-    typeDetector = new TypeDetector();
-    autoDetectParser = new AutoDetectParser(typeDetector);
-    recursingContext = new ParseContext();
-    recursingContext.set(Parser.class, autoDetectParser);
-
-    mboxParser = new MboxParser();
-    mboxParser.setTracking(true);
-  }
-
-  @Test
-  public void testSimple() throws Exception {
-    ContentHandler handler = new BodyContentHandler();
-    Metadata metadata = new Metadata();
-    InputStream stream = getStream("/test-documents/simple.mbox");
-
-    try {
-      mboxParser.parse(stream, handler, metadata, recursingContext);
-    } finally {
-      stream.close();
-    }
-
-    String content = handler.toString();
-    assertContains("Test content 1", content);
-    assertContains("Test content 2", content);
-    assertEquals("application/mbox", metadata.get(Metadata.CONTENT_TYPE));
-
-    Map<Integer, Metadata> mailsMetadata = mboxParser.getTrackingMetadata();
-    assertEquals("Nb. Of mails", 2, mailsMetadata.size());
-
-    Metadata mail1 = mailsMetadata.get(0);
-    assertEquals("message/rfc822", mail1.get(Metadata.CONTENT_TYPE));
-    assertEquals("envelope-sender-mailbox-name Mon Jun 01 10:00:00 2009", mail1.get("MboxParser-from"));
-
-    Metadata mail2 = mailsMetadata.get(1);
-    assertEquals("message/rfc822", mail2.get(Metadata.CONTENT_TYPE));
-    assertEquals("envelope-sender-mailbox-name Mon Jun 01 11:00:00 2010", mail2.get("MboxParser-from"));
-  }
-
-  @Test
-  public void testHeaders() throws Exception {
-    ContentHandler handler = new BodyContentHandler();
-    Metadata metadata = new Metadata();
-    InputStream stream = getStream("/test-documents/headers.mbox");
-
-    try {
-      mboxParser.parse(stream, handler, metadata, recursingContext);
-    } finally {
-      stream.close();
-    }
-
-    assertContains("Test content", handler.toString());
-    assertEquals("Nb. Of mails", 1, mboxParser.getTrackingMetadata().size());
-
-    Metadata mailMetadata = mboxParser.getTrackingMetadata().get(0);
-
-    assertEquals("2009-06-10T03:58:45Z", mailMetadata.get(TikaCoreProperties.CREATED));
-    assertEquals("<au...@domain.com>", mailMetadata.get(TikaCoreProperties.CREATOR));
-    assertEquals("subject", mailMetadata.get(Metadata.SUBJECT));
-    assertEquals("<au...@domain.com>", mailMetadata.get(Metadata.AUTHOR));
-    assertEquals("message/rfc822", mailMetadata.get(Metadata.CONTENT_TYPE));
-    assertEquals("author@domain.com", mailMetadata.get("Message-From"));
-    assertEquals("<na...@domain.com>", mailMetadata.get("MboxParser-return-path"));
-  }
-
-  @Test
-  public void testMultilineHeader() throws Exception {
-    ContentHandler handler = new BodyContentHandler();
-    Metadata metadata = new Metadata();
-    InputStream stream = getStream("/test-documents/multiline.mbox");
-
-    try {
-      mboxParser.parse(stream, handler, metadata, recursingContext);
-    } finally {
-      stream.close();
-    }
-
-    assertEquals("Nb. Of mails", 1, mboxParser.getTrackingMetadata().size());
-
-    Metadata mailMetadata = mboxParser.getTrackingMetadata().get(0);
-    assertEquals("from xxx by xxx with xxx; date", mailMetadata.get("MboxParser-received"));
-  }
-
-  @Test
-  public void testQuoted() throws Exception {
-    ContentHandler handler = new BodyContentHandler();
-    Metadata metadata = new Metadata();
-    InputStream stream = getStream("/test-documents/quoted.mbox");
-
-    try {
-      mboxParser.parse(stream, handler, metadata, recursingContext);
-    } finally {
-      stream.close();
-    }
-
-    assertContains("Test content", handler.toString());
-    assertContains("> quoted stuff", handler.toString());
-  }
-
-  @Test
-  public void testComplex() throws Exception {
-    ContentHandler handler = new BodyContentHandler();
-    Metadata metadata = new Metadata();
-    InputStream stream = getStream("/test-documents/complex.mbox");
-
-    try {
-      mboxParser.parse(stream, handler, metadata, recursingContext);
-    } finally {
-      stream.close();
-    }
-
-    assertEquals("Nb. Of mails", 3, mboxParser.getTrackingMetadata().size());
-
-    Metadata firstMail = mboxParser.getTrackingMetadata().get(0);
-    assertEquals("Re: question about when shuffle/sort start working", firstMail.get(Metadata.SUBJECT));
-    assertEquals("Re: question about when shuffle/sort start working", firstMail.get(TikaCoreProperties.TITLE));
-    assertEquals("Jothi Padmanabhan <jo...@yahoo-inc.com>", firstMail.get(Metadata.AUTHOR));
-    assertEquals("Jothi Padmanabhan <jo...@yahoo-inc.com>", firstMail.get(TikaCoreProperties.CREATOR));
-    assertEquals("core-user@hadoop.apache.org", firstMail.get(Metadata.MESSAGE_RECIPIENT_ADDRESS));
-
-    assertContains("When a Mapper completes", handler.toString());
-  }
-
-  private static InputStream getStream(String name) {
-    return MboxParserTest.class.getClass().getResourceAsStream(name);
-  }
+    protected ParseContext recursingContext;
+    private Parser autoDetectParser;
+    private TypeDetector typeDetector;
+    private MboxParser mboxParser;
+
+    private static InputStream getStream(String name) {
+        return MboxParserTest.class.getClass().getResourceAsStream(name);
+    }
+
+    @Before
+    public void setUp() throws Exception {
+        typeDetector = new TypeDetector();
+        autoDetectParser = new AutoDetectParser(typeDetector);
+        recursingContext = new ParseContext();
+        recursingContext.set(Parser.class, autoDetectParser);
+
+        mboxParser = new MboxParser();
+        mboxParser.setTracking(true);
+    }
+
+    @Test
+    public void testSimple() throws Exception {
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("/test-documents/simple.mbox");
+
+        try {
+            mboxParser.parse(stream, handler, metadata, recursingContext);
+        } finally {
+            stream.close();
+        }
+
+        String content = handler.toString();
+        assertContains("Test content 1", content);
+        assertContains("Test content 2", content);
+        assertEquals("application/mbox", metadata.get(Metadata.CONTENT_TYPE));
+
+        Map<Integer, Metadata> mailsMetadata = mboxParser.getTrackingMetadata();
+        assertEquals("Nb. Of mails", 2, mailsMetadata.size());
+
+        Metadata mail1 = mailsMetadata.get(0);
+        assertEquals("message/rfc822", mail1.get(Metadata.CONTENT_TYPE));
+        assertEquals("envelope-sender-mailbox-name Mon Jun 01 10:00:00 2009", mail1.get("MboxParser-from"));
+
+        Metadata mail2 = mailsMetadata.get(1);
+        assertEquals("message/rfc822", mail2.get(Metadata.CONTENT_TYPE));
+        assertEquals("envelope-sender-mailbox-name Mon Jun 01 11:00:00 2010", mail2.get("MboxParser-from"));
+    }
+
+    @Test
+    public void testHeaders() throws Exception {
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("/test-documents/headers.mbox");
+
+        try {
+            mboxParser.parse(stream, handler, metadata, recursingContext);
+        } finally {
+            stream.close();
+        }
+
+        assertContains("Test content", handler.toString());
+        assertEquals("Nb. Of mails", 1, mboxParser.getTrackingMetadata().size());
+
+        Metadata mailMetadata = mboxParser.getTrackingMetadata().get(0);
+
+        assertEquals("2009-06-10T03:58:45Z", mailMetadata.get(TikaCoreProperties.CREATED));
+        assertEquals("<au...@domain.com>", mailMetadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("subject", mailMetadata.get(Metadata.SUBJECT));
+        assertEquals("<au...@domain.com>", mailMetadata.get(Metadata.AUTHOR));
+        assertEquals("message/rfc822", mailMetadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("author@domain.com", mailMetadata.get("Message-From"));
+        assertEquals("<na...@domain.com>", mailMetadata.get("MboxParser-return-path"));
+    }
+
+    @Test
+    public void testMultilineHeader() throws Exception {
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("/test-documents/multiline.mbox");
+
+        try {
+            mboxParser.parse(stream, handler, metadata, recursingContext);
+        } finally {
+            stream.close();
+        }
+
+        assertEquals("Nb. Of mails", 1, mboxParser.getTrackingMetadata().size());
+
+        Metadata mailMetadata = mboxParser.getTrackingMetadata().get(0);
+        assertEquals("from xxx by xxx with xxx; date", mailMetadata.get("MboxParser-received"));
+    }
+
+    @Test
+    public void testQuoted() throws Exception {
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("/test-documents/quoted.mbox");
+
+        try {
+            mboxParser.parse(stream, handler, metadata, recursingContext);
+        } finally {
+            stream.close();
+        }
+
+        assertContains("Test content", handler.toString());
+        assertContains("> quoted stuff", handler.toString());
+    }
+
+    @Test
+    public void testComplex() throws Exception {
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        InputStream stream = getStream("/test-documents/complex.mbox");
+
+        try {
+            mboxParser.parse(stream, handler, metadata, recursingContext);
+        } finally {
+            stream.close();
+        }
+
+        assertEquals("Nb. Of mails", 3, mboxParser.getTrackingMetadata().size());
+
+        Metadata firstMail = mboxParser.getTrackingMetadata().get(0);
+        assertEquals("Re: question about when shuffle/sort start working", firstMail.get(Metadata.SUBJECT));
+        assertEquals("Re: question about when shuffle/sort start working", firstMail.get(TikaCoreProperties.TITLE));
+        assertEquals("Jothi Padmanabhan <jo...@yahoo-inc.com>", firstMail.get(Metadata.AUTHOR));
+        assertEquals("Jothi Padmanabhan <jo...@yahoo-inc.com>", firstMail.get(TikaCoreProperties.CREATOR));
+        assertEquals("core-user@hadoop.apache.org", firstMail.get(Metadata.MESSAGE_RECIPIENT_ADDRESS));
+
+        assertContains("When a Mapper completes", handler.toString());
+    }
 
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java?rev=1682489&r1=1682488&r2=1682489&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java Fri May 29 14:36:21 2015
@@ -28,7 +28,7 @@ import org.apache.tika.mime.MediaType;
 
 /**
  * Parent class of tests that the various POI powered parsers are
- *  able to extract their embedded contents.
+ * able to extract their embedded contents.
  */
 public abstract class AbstractPOIContainerExtractionTest {
     public static final MediaType TYPE_DOC = MediaType.application("msword");
@@ -38,16 +38,24 @@ public abstract class AbstractPOIContain
     public static final MediaType TYPE_PPTX = MediaType.application("vnd.openxmlformats-officedocument.presentationml.presentation");
     public static final MediaType TYPE_XLSX = MediaType.application("vnd.openxmlformats-officedocument.spreadsheetml.sheet");
     public static final MediaType TYPE_MSG = MediaType.application("vnd.ms-outlook");
-    
+
     public static final MediaType TYPE_TXT = MediaType.text("plain");
     public static final MediaType TYPE_PDF = MediaType.application("pdf");
-    
+
     public static final MediaType TYPE_JPG = MediaType.image("jpeg");
     public static final MediaType TYPE_GIF = MediaType.image("gif");
     public static final MediaType TYPE_PNG = MediaType.image("png");
     public static final MediaType TYPE_EMF = MediaType.application("x-emf");
     public static final MediaType TYPE_WMF = MediaType.application("x-msmetafile");
 
+    protected static TikaInputStream getTestFile(String filename) throws Exception {
+        URL input = AbstractPOIContainerExtractionTest.class.getResource(
+                "/test-documents/" + filename);
+        assertNotNull(filename + " not found", input);
+
+        return TikaInputStream.get(input);
+    }
+
     protected TrackingHandler process(String filename, ContainerExtractor extractor, boolean recurse) throws Exception {
         TikaInputStream stream = getTestFile(filename);
         try {
@@ -55,7 +63,7 @@ public abstract class AbstractPOIContain
 
             // Process it
             TrackingHandler handler = new TrackingHandler();
-            if(recurse) {
+            if (recurse) {
                 extractor.extract(stream, extractor, handler);
             } else {
                 extractor.extract(stream, null, handler);
@@ -67,12 +75,4 @@ public abstract class AbstractPOIContain
             stream.close();
         }
     }
-    
-    protected static TikaInputStream getTestFile(String filename) throws Exception {
-        URL input = AbstractPOIContainerExtractionTest.class.getResource(
-               "/test-documents/" + filename);
-        assertNotNull(filename + " not found", input);
-
-        return TikaInputStream.get(input);
-    }
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java?rev=1682489&r1=1682488&r2=1682489&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java Fri May 29 14:36:21 2015
@@ -5,9 +5,9 @@
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -60,15 +60,15 @@ public class ExcelParserTest {
             assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
             assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
             assertEquals("Keith Bennett", metadata.get(Metadata.AUTHOR));
-            
+
             // Mon Oct 01 17:13:56 BST 2007
             assertEquals("2007-10-01T16:13:56Z", metadata.get(TikaCoreProperties.CREATED));
             assertEquals("2007-10-01T16:13:56Z", metadata.get(Metadata.CREATION_DATE));
-            
+
             // Mon Oct 01 17:31:43 BST 2007
             assertEquals("2007-10-01T16:31:43Z", metadata.get(TikaCoreProperties.MODIFIED));
             assertEquals("2007-10-01T16:31:43Z", metadata.get(Metadata.DATE));
-            
+
             String content = handler.toString();
             assertContains("Sample Excel Worksheet", content);
             assertContains("Numbers and their Squares", content);
@@ -115,7 +115,7 @@ public class ExcelParserTest {
             // Percentage.
             assertContains("2.50%", content);
             // Excel rounds up to 3%, but that requires Java 1.6 or later
-            if(System.getProperty("java.version").startsWith("1.5")) {
+            if (System.getProperty("java.version").startsWith("1.5")) {
                 assertContains("2%", content);
             } else {
                 assertContains("3%", content);
@@ -130,31 +130,31 @@ public class ExcelParserTest {
 
             // Date Format: m/d/yy
             assertContains("10/3/09", content);
-            
+
             // Date/Time Format: m/d/yy h:mm
             assertContains("1/19/08 4:35", content);
 
             // Fraction (2.5): # ?/?
             assertContains("2 1/2", content);
 
-            
+
             // Below assertions represent outstanding formatting issues to be addressed
             // they are included to allow the issues to be progressed with the Apache POI
             // team - See TIKA-103.
 
             /*************************************************************************
-            // Custom Number (0 "dollars and" .00 "cents")
-            assertContains("19 dollars and .99 cents", content);
+             // Custom Number (0 "dollars and" .00 "cents")
+             assertContains("19 dollars and .99 cents", content);
 
-            // Custom Number ("At" h:mm AM/PM "on" dddd mmmm d"," yyyy)
-            assertContains("At 4:20 AM on Thursday May 17, 2007", content);
-            **************************************************************************/
+             // Custom Number ("At" h:mm AM/PM "on" dddd mmmm d"," yyyy)
+             assertContains("At 4:20 AM on Thursday May 17, 2007", content);
+             **************************************************************************/
 
         } finally {
             input.close();
         }
     }
-    
+
     @Test
     public void testExcelParserPassword() throws Exception {
         InputStream input = ExcelParserTest.class.getResourceAsStream(
@@ -191,11 +191,11 @@ public class ExcelParserTest {
             assertEquals(
                     "application/vnd.ms-excel",
                     metadata.get(Metadata.CONTENT_TYPE));
-            
+
             assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
             assertEquals("Antoni", metadata.get(TikaCoreProperties.CREATOR));
             assertEquals("2011-11-25T09:52:48Z", metadata.get(TikaCoreProperties.CREATED));
-            
+
             String content = handler.toString();
             assertContains("This is an Encrypted Excel spreadsheet", content);
             assertNotContained("9.0", content);
@@ -210,24 +210,24 @@ public class ExcelParserTest {
     @Test
     public void testExcelParserCharts() throws Exception {
         InputStream input = ExcelParserTest.class.getResourceAsStream(
-                  "/test-documents/testEXCEL-charts.xls");
+                "/test-documents/testEXCEL-charts.xls");
         try {
             Metadata metadata = new Metadata();
             ParseContext context = new ParseContext();
             context.set(Locale.class, Locale.US);
             ContentHandler handler = new BodyContentHandler();
             new OfficeParser().parse(input, handler, metadata, context);
-        
+
             assertEquals(
                     "application/vnd.ms-excel",
                     metadata.get(Metadata.CONTENT_TYPE));
-        
+
             String content = handler.toString();
-            
+
             // The first sheet has a pie chart
             assertContains("charttabyodawg", content);
             assertContains("WhamPuff", content);
-            
+
             // The second sheet has a bar chart and some text
             assertContains("Sheet1", content);
             assertContains("Test Excel Spreasheet", content);
@@ -236,7 +236,7 @@ public class ExcelParserTest {
             assertContains("fizzlepuff", content);
             assertContains("whyaxis", content);
             assertContains("eksaxis", content);
-            
+
             // The third sheet has some text
             assertContains("Sheet2", content);
             assertContains("dingdong", content);
@@ -265,7 +265,7 @@ public class ExcelParserTest {
             input.close();
         }
     }
-    
+
     @Test
     public void testWorksSpreadsheet70() throws Exception {
         InputStream input = ExcelParserTest.class.getResourceAsStream(
@@ -291,43 +291,43 @@ public class ExcelParserTest {
      */
     @Test
     public void testExcelXLSB() throws Exception {
-       Detector detector = new DefaultDetector();
-       AutoDetectParser parser = new AutoDetectParser();
-       
-       InputStream input = ExcelParserTest.class.getResourceAsStream(
-             "/test-documents/testEXCEL.xlsb");
-       Metadata m = new Metadata();
-       m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
-       
-       // Should be detected correctly
-       MediaType type = null;
-       try {
-          type = detector.detect(input, m);
-          assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
-       } finally {
-          input.close();
-       }
-       
-       // OfficeParser won't handle it
-       assertEquals(false, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
-       
-       // OOXMLParser won't handle it
-       assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
-       
-       // AutoDetectParser doesn't break on it
-       input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb");
-
-       try {
-          ContentHandler handler = new BodyContentHandler(-1);
-          ParseContext context = new ParseContext();
-          context.set(Locale.class, Locale.US);
-          parser.parse(input, handler, m, context);
-
-          String content = handler.toString();
-          assertEquals("", content);
-       } finally {
-          input.close();
-       }
+        Detector detector = new DefaultDetector();
+        AutoDetectParser parser = new AutoDetectParser();
+
+        InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL.xlsb");
+        Metadata m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "excel.xlsb");
+
+        // Should be detected correctly
+        MediaType type = null;
+        try {
+            type = detector.detect(input, m);
+            assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", type.toString());
+        } finally {
+            input.close();
+        }
+
+        // OfficeParser won't handle it
+        assertEquals(false, (new OfficeParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+        // OOXMLParser won't handle it
+        assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
+
+        // AutoDetectParser doesn't break on it
+        input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL.xlsb");
+
+        try {
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            parser.parse(input, handler, m, context);
+
+            String content = handler.toString();
+            assertEquals("", content);
+        } finally {
+            input.close();
+        }
     }
 
     /**
@@ -335,32 +335,32 @@ public class ExcelParserTest {
      */
     @Test
     public void testExcel95() throws Exception {
-       Detector detector = new DefaultDetector();
-       AutoDetectParser parser = new AutoDetectParser();
-       InputStream input;
-       MediaType type;
-       Metadata m;
-       
-       // First try detection of Excel 5
-       m = new Metadata();
-       m.add(Metadata.RESOURCE_NAME_KEY, "excel_5.xls");
-       input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls");
-       try {
-           type = detector.detect(input, m);
-           assertEquals("application/vnd.ms-excel", type.toString());
-        } finally {
-           input.close();
-        }
-       
-       // Now Excel 95
-       m = new Metadata();
-       m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
-       input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls");
-       try {
-           type = detector.detect(input, m);
-           assertEquals("application/vnd.ms-excel", type.toString());
+        Detector detector = new DefaultDetector();
+        AutoDetectParser parser = new AutoDetectParser();
+        InputStream input;
+        MediaType type;
+        Metadata m;
+
+        // First try detection of Excel 5
+        m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "excel_5.xls");
+        input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls");
+        try {
+            type = detector.detect(input, m);
+            assertEquals("application/vnd.ms-excel", type.toString());
+        } finally {
+            input.close();
+        }
+
+        // Now Excel 95
+        m = new Metadata();
+        m.add(Metadata.RESOURCE_NAME_KEY, "excel_95.xls");
+        input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls");
+        try {
+            type = detector.detect(input, m);
+            assertEquals("application/vnd.ms-excel", type.toString());
         } finally {
-           input.close();
+            input.close();
         }
 
         // OfficeParser can handle it
@@ -368,8 +368,8 @@ public class ExcelParserTest {
 
         // OOXMLParser won't handle it
         assertEquals(false, (new OOXMLParser()).getSupportedTypes(new ParseContext()).contains(type));
-       
-        
+
+
         // Parse the Excel 5 file
         m = new Metadata();
         input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_5.xls");
@@ -380,26 +380,26 @@ public class ExcelParserTest {
             parser.parse(input, handler, m, context);
 
             String content = handler.toString();
-            
+
             // Sheet names
             assertContains("Feuil1", content);
             assertContains("Feuil3", content);
-            
+
             // Text
             assertContains("Sample Excel", content);
             assertContains("Number", content);
-            
+
             // Numbers
             assertContains("15", content);
             assertContains("225", content);
-            
+
             // Metadata was also fetched
             assertEquals("Simple Excel document", m.get(TikaCoreProperties.TITLE));
             assertEquals("Keith Bennett", m.get(TikaCoreProperties.CREATOR));
         } finally {
             input.close();
         }
-        
+
         // Parse the Excel 95 file
         m = new Metadata();
         input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_95.xls");
@@ -410,12 +410,12 @@ public class ExcelParserTest {
             parser.parse(input, handler, m, context);
 
             String content = handler.toString();
-            
+
             // Sheet name
             assertContains("Foglio1", content);
-            
+
             // Very boring file, no actual text or numbers!
-            
+
             // Metadata was also fetched
             assertEquals(null, m.get(TikaCoreProperties.TITLE));
             assertEquals("Marco Quaranta", m.get(Office.LAST_AUTHOR));
@@ -423,35 +423,35 @@ public class ExcelParserTest {
             input.close();
         }
     }
-    
+
     /**
      * Ensures that custom OLE2 (HPSF) properties are extracted
      */
     @Test
     public void testCustomProperties() throws Exception {
-       InputStream input = ExcelParserTest.class.getResourceAsStream(
-             "/test-documents/testEXCEL_custom_props.xls");
-       Metadata metadata = new Metadata();
-       
-       try {
-          ContentHandler handler = new BodyContentHandler(-1);
-          ParseContext context = new ParseContext();
-          context.set(Locale.class, Locale.US);
-          new OfficeParser().parse(input, handler, metadata, context);
-       } finally {
-          input.close();
-       }
-       
-       assertEquals("application/vnd.ms-excel", metadata.get(Metadata.CONTENT_TYPE));
-       assertEquals("",                     metadata.get(TikaCoreProperties.CREATOR));
-       assertEquals("",                     metadata.get(TikaCoreProperties.MODIFIER));
-       assertEquals("2011-08-22T13:45:54Z", metadata.get(TikaCoreProperties.MODIFIED));
-       assertEquals("2006-09-12T15:06:44Z", metadata.get(TikaCoreProperties.CREATED));
-       assertEquals("Microsoft Excel",      metadata.get(OfficeOpenXMLExtended.APPLICATION));
-       assertEquals("true",                 metadata.get("custom:myCustomBoolean"));
-       assertEquals("3",                    metadata.get("custom:myCustomNumber"));
-       assertEquals("MyStringValue",        metadata.get("custom:MyCustomString"));
-       assertEquals("2010-12-30T22:00:00Z", metadata.get("custom:MyCustomDate"));
-       assertEquals("2010-12-29T22:00:00Z", metadata.get("custom:myCustomSecondDate"));
+        InputStream input = ExcelParserTest.class.getResourceAsStream(
+                "/test-documents/testEXCEL_custom_props.xls");
+        Metadata metadata = new Metadata();
+
+        try {
+            ContentHandler handler = new BodyContentHandler(-1);
+            ParseContext context = new ParseContext();
+            context.set(Locale.class, Locale.US);
+            new OfficeParser().parse(input, handler, metadata, context);
+        } finally {
+            input.close();
+        }
+
+        assertEquals("application/vnd.ms-excel", metadata.get(Metadata.CONTENT_TYPE));
+        assertEquals("", metadata.get(TikaCoreProperties.CREATOR));
+        assertEquals("", metadata.get(TikaCoreProperties.MODIFIER));
+        assertEquals("2011-08-22T13:45:54Z", metadata.get(TikaCoreProperties.MODIFIED));
+        assertEquals("2006-09-12T15:06:44Z", metadata.get(TikaCoreProperties.CREATED));
+        assertEquals("Microsoft Excel", metadata.get(OfficeOpenXMLExtended.APPLICATION));
+        assertEquals("true", metadata.get("custom:myCustomBoolean"));
+        assertEquals("3", metadata.get("custom:myCustomNumber"));
+        assertEquals("MyStringValue", metadata.get("custom:MyCustomString"));
+        assertEquals("2010-12-30T22:00:00Z", metadata.get("custom:MyCustomDate"));
+        assertEquals("2010-12-29T22:00:00Z", metadata.get("custom:myCustomSecondDate"));
     }
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java?rev=1682489&r1=1682488&r2=1682489&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OfficeParserTest.java Fri May 29 14:36:21 2015
@@ -28,20 +28,19 @@ import org.apache.tika.parser.microsoft.
 import org.junit.Test;
 
 
-
 public class OfficeParserTest extends TikaTest {
 
-  @Test
-  public void parseOfficeWord() throws Exception {
-    Metadata metadata = new Metadata();
-    Parser parser = new OfficeParser();
-
-    String xml = getXML(getTestDocument("test.doc"), parser, metadata).xml;
-
-    assertTrue(xml.contains("test"));
-  }
-
-  private InputStream getTestDocument(String name) {
-    return TikaInputStream.get(OOXMLParserTest.class.getResourceAsStream("/test-documents/" + name));
-}
+    @Test
+    public void parseOfficeWord() throws Exception {
+        Metadata metadata = new Metadata();
+        Parser parser = new OfficeParser();
+
+        String xml = getXML(getTestDocument("test.doc"), parser, metadata).xml;
+
+        assertTrue(xml.contains("test"));
+    }
+
+    private InputStream getTestDocument(String name) {
+        return TikaInputStream.get(OOXMLParserTest.class.getResourceAsStream("/test-documents/" + name));
+    }
 }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java?rev=1682489&r1=1682488&r2=1682489&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OldExcelParserTest.java Fri May 29 14:36:21 2015
@@ -65,12 +65,12 @@ public class OldExcelParserTest extends
 
         // We can get the content type
         assertEquals("application/vnd.ms-excel.sheet.4", metadata.get(Metadata.CONTENT_TYPE));
-        
+
         // But no other metadata
         assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
         assertEquals(null, metadata.get(Metadata.SUBJECT));
     }
-    
+
     /**
      * Check we can get the plain text properly
      */
@@ -85,9 +85,9 @@ public class OldExcelParserTest extends
         } finally {
             stream.close();
         }
-        
+
         String text = handler.toString();
-        
+
         // Check we find a few words we expect in there
         assertContains("Size", text);
         assertContains("Returns", text);
@@ -104,15 +104,15 @@ public class OldExcelParserTest extends
     public void testHTML() throws Exception {
         XMLResult result = getXML(file);
         String xml = result.xml;
-        
+
         // Sheet name not found - only 5+ have sheet names
         assertNotContained("<p>Sheet 1</p>", xml);
-        
+
         // String cells
         assertContains("<p>Table 10 -", xml);
         assertContains("<p>Tax</p>", xml);
         assertContains("<p>N/A</p>", xml);
-        
+
         // Number cells
         assertContains("<p>(1)</p>", xml);
         assertContains("<p>5.0</p>", xml);

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java?rev=1682489&r1=1682488&r2=1682489&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java Fri May 29 14:36:21 2015
@@ -21,15 +21,14 @@ import static org.junit.Assert.assertEqu
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import java.io.InputStream;
-import java.io.StringWriter;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.sax.SAXTransformerFactory;
 import javax.xml.transform.sax.TransformerHandler;
 import javax.xml.transform.stream.StreamResult;
+import java.io.InputStream;
+import java.io.StringWriter;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
@@ -74,7 +73,7 @@ public class OutlookParserTest {
         assertEquals(
                 "L'\u00C9quipe Microsoft Outlook Express",
                 metadata.get(Metadata.AUTHOR));
-        
+
         // Stored as Thu, 5 Apr 2007 09:26:06 -0700
         assertEquals(
                 "2007-04-05T16:26:06Z",
@@ -118,7 +117,7 @@ public class OutlookParserTest {
     }
 
     /**
-     * Test case for TIKA-395, to ensure parser works for new Outlook formats. 
+     * Test case for TIKA-395, to ensure parser works for new Outlook formats.
      *
      * @see <a href="https://issues.apache.org/jira/browse/TIKA-395">TIKA-395</a>
      */
@@ -148,29 +147,29 @@ public class OutlookParserTest {
         assertContains("Streamlined Mail Experience", content);
         assertContains("Navigation Pane", content);
     }
-     
+
     @Test
     public void testOutlookHTMLVersion() throws Exception {
         Parser parser = new AutoDetectParser();
         Metadata metadata = new Metadata();
-       
+
         // Check the HTML version
         StringWriter sw = new StringWriter();
         SAXTransformerFactory factory = (SAXTransformerFactory)
-                 SAXTransformerFactory.newInstance();
+                SAXTransformerFactory.newInstance();
         TransformerHandler handler = factory.newTransformerHandler();
         handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
         handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
         handler.setResult(new StreamResult(sw));
 
         InputStream stream = OutlookParserTest.class.getResourceAsStream(
-               "/test-documents/testMSG_chinese.msg");
+                "/test-documents/testMSG_chinese.msg");
         try {
-           parser.parse(stream, handler, metadata, new ParseContext());
+            parser.parse(stream, handler, metadata, new ParseContext());
         } finally {
-           stream.close();
+            stream.close();
         }
-         
+
         // As the HTML version should have been processed, ensure
         //  we got some of the links
         String content = sw.toString();
@@ -178,7 +177,7 @@ public class OutlookParserTest {
         assertContains("<p>Alfresco MSG format testing", content);
         assertContains("<li>1", content);
         assertContains("<li>2", content);
-        
+
         // Make sure we don't have nested html docs
         assertEquals(2, content.split("<body>").length);
         assertEquals(2, content.split("<\\/body>").length);
@@ -188,39 +187,39 @@ public class OutlookParserTest {
     public void testOutlookForwarded() throws Exception {
         Parser parser = new AutoDetectParser();
         Metadata metadata = new Metadata();
-       
+
         // Check the HTML version
         StringWriter sw = new StringWriter();
         SAXTransformerFactory factory = (SAXTransformerFactory)
-                 SAXTransformerFactory.newInstance();
+                SAXTransformerFactory.newInstance();
         TransformerHandler handler = factory.newTransformerHandler();
         handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
         handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
         handler.setResult(new StreamResult(sw));
 
         InputStream stream = OutlookParserTest.class.getResourceAsStream(
-               "/test-documents/testMSG_forwarded.msg");
+                "/test-documents/testMSG_forwarded.msg");
         try {
-           parser.parse(stream, handler, metadata, new ParseContext());
+            parser.parse(stream, handler, metadata, new ParseContext());
         } finally {
-           stream.close();
+            stream.close();
         }
-         
+
         // Make sure we don't have nested docs
         String content = sw.toString();
         assertEquals(2, content.split("<body>").length);
         assertEquals(2, content.split("<\\/body>").length);
     }
-    
+
     @Test
     public void testOutlookHTMLfromRTF() throws Exception {
         Parser parser = new AutoDetectParser();
         Metadata metadata = new Metadata();
-       
+
         // Check the HTML version
         StringWriter sw = new StringWriter();
         SAXTransformerFactory factory = (SAXTransformerFactory)
-                 SAXTransformerFactory.newInstance();
+                SAXTransformerFactory.newInstance();
         TransformerHandler handler = factory.newTransformerHandler();
         handler.getTransformer().setOutputProperty(OutputKeys.METHOD, "xml");
         handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes");
@@ -229,24 +228,24 @@ public class OutlookParserTest {
         InputStream stream = OutlookParserTest.class.getResourceAsStream(
                 "/test-documents/test-outlook2003.msg");
         try {
-           parser.parse(stream, handler, metadata, new ParseContext());
+            parser.parse(stream, handler, metadata, new ParseContext());
         } finally {
-           stream.close();
+            stream.close();
         }
-         
+
         // As the HTML version should have been processed, ensure
         //  we got some of the links
-        String content = sw.toString().replaceAll("<p>\\s+","<p>");
+        String content = sw.toString().replaceAll("<p>\\s+", "<p>");
         assertContains("<dd>New Outlook User</dd>", content);
         assertContains("designed <i>to help you", content);
         assertContains("<p><a href=\"http://r.office.microsoft.com/r/rlidOutlookWelcomeMail10?clid=1033\">Cached Exchange Mode</a>", content);
-        
+
         // Link - check text around it, and the link itself
         assertContains("sign up for a free subscription", content);
         assertContains("Office Newsletter", content);
         assertContains("newsletter will be sent to you", content);
         assertContains("http://r.office.microsoft.com/r/rlidNewsletterSignUp?clid=1033", content);
-        
+
         // Make sure we don't have nested html docs
         assertEquals(2, content.split("<body>").length);
         assertEquals(2, content.split("<\\/body>").length);