You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2007/09/24 16:44:18 UTC

svn commit: r578843 - in /incubator/tika/trunk: ./ src/main/java/org/apache/tika/parser/ src/main/java/org/apache/tika/parser/html/ src/main/java/org/apache/tika/utils/ src/main/resources/mime/ src/test/java/org/apache/tika/ src/test/java/org/apache/ti...

Author: mattmann
Date: Mon Sep 24 07:44:18 2007
New Revision: 578843

URL: http://svn.apache.org/viewvc?rev=578843&view=rev
Log:
- fix for TIKA-17

Added:
    incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java
Removed:
    incubator/tika/trunk/src/main/java/org/apache/tika/utils/MimeTypesUtils.java
    incubator/tika/trunk/src/test/java/org/apache/tika/utils/
Modified:
    incubator/tika/trunk/CHANGES.txt
    incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java
    incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java
    incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
    incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java

Modified: incubator/tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/CHANGES.txt?rev=578843&r1=578842&r2=578843&view=diff
==============================================================================
--- incubator/tika/trunk/CHANGES.txt (original)
+++ incubator/tika/trunk/CHANGES.txt Mon Sep 24 07:44:18 2007
@@ -32,3 +32,6 @@
 14. TIKA-6 - Port Nutch (or better) MimeType detection system into Tika (J. Charron & mattmann)
 
 15. TIKA-25 - Removed hardcoded reference to C:\oo.xml in OpenOfficeParser (K. Bennett & jukka)
+
+16. TIKA-17 - Need to support URL's for input resources. (K. Bennett & mattmann)
+

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java?rev=578843&r1=578842&r2=578843&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java Mon Sep 24 07:44:18 2007
@@ -16,17 +16,13 @@
  */
 package org.apache.tika.parser;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
+import java.io.InputStream;
 
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
 import org.apache.tika.config.LiusConfig;
 import org.apache.tika.config.ParserConfig;
 import org.apache.tika.exception.LiusException;
-import org.apache.tika.utils.MimeTypesUtils;
-
-import org.apache.log4j.Logger;
-import org.jdom.JDOMException;
 
 /**
  * Factory class. Build parser from xml config file.
@@ -37,77 +33,80 @@
 
     static Logger logger = Logger.getRootLogger();
 
-    /**
-     * Build parser from file and Lius config object
-     */
-    public static Parser getParser(File file, LiusConfig tc)
-            throws IOException, LiusException {
-        if(!file.canRead()) {
-          throw new IOException("Cannot read input file " + file.getAbsoluteFile());
+
+
+    public static Parser getParser(
+            InputStream inputStream, String mimeType, LiusConfig tc)
+            throws LiusException {
+
+        // Verify that all passed parameters are (probably) valid.
+
+        if (StringUtils.isBlank(mimeType)) {
+            throw new LiusException("Mime type not specified.");
         }
-        String mimeType = MimeTypesUtils.getMimeType(file);
-        ParserConfig pc = tc.getParserConfig(mimeType);
-        if(pc==null) {
-          throw new LiusException(
-              "No ParserConfig available for mime-type '" + mimeType + "'"
-              + " for file " + file.getName()
-          );
+
+        if (inputStream == null) {
+            throw new LiusException("Input stream is null.");
+        }
+
+        if (tc == null) {
+            throw new LiusException("Configuration object is null.");
         }
+
+        ParserConfig pc = getParserConfig(mimeType, tc);
+        if (pc == null) {
+            throw new LiusException(
+                    "Could not find parser config for mime type "
+                    + mimeType + ".");
+        }
+
         String className = pc.getParserClass();
         Parser parser = null;
-        Class<?> parserClass = null;
-        if (className != null) {
-            try {
-                logger.debug(
-                    "Loading parser class = " + className
-                    + " MimeType = " + mimeType
-                    + " for file " + file.getName()
-                );
-
-                parserClass = Class.forName(className);
-                parser = (Parser) parserClass.newInstance();
-
-            } catch (ClassNotFoundException e) {
-                logger.error(e.getMessage());
-
-            } catch (InstantiationException e) {
-                logger.error(e.getMessage());
-            } catch (IllegalAccessException e) {
-                logger.error(e.getMessage());
-            }
+
+        if (StringUtils.isBlank(className)) {
+            throw new LiusException(
+                    "Parser class name missing from ParserConfig.");
+        }
+
+        try {
+            logger.info("Loading parser class = " + className
+                    + " MimeType = " + mimeType);
+
+            Class<?> parserClass = Class.forName(className);
+            parser = (Parser) parserClass.newInstance();
             parser.setMimeType(mimeType);
-            parser.setNamespace(pc.getNameSpace());
             parser.setContents(pc.getContents());
-            parser.setInputStream(new FileInputStream(file));
+            parser.setInputStream(inputStream);
+
+        } catch (ClassNotFoundException e) {
+            logger.error(e.getMessage());
+            throw new LiusException(e.getMessage());
+        } catch (InstantiationException e) {
+            logger.error(e.getMessage());
+            throw new LiusException(e.getMessage());
+        } catch (IllegalAccessException e) {
+            logger.error(e.getMessage());
+            throw new LiusException(e.getMessage());
         }
 
         return parser;
     }
 
-    /**
-     * Build parser from string file path and Lius config object
-     */
-    public static Parser getParser(String str, LiusConfig tc)
-            throws IOException, LiusException {
-        return getParser(new File(str), tc);
-    }
 
-    /**
-     * Build parser from string file path and Lius config file path
-     */
-    public static Parser getParser(String str, String tcPath)
-            throws IOException, LiusException, JDOMException {
-        LiusConfig tc = LiusConfig.getInstance(tcPath);
-        return getParser(new File(str), tc);
-    }
+    private static ParserConfig getParserConfig(String mimeType, LiusConfig tc)
+            throws LiusException {
 
-    /**
-     * Build parser from file and Lius config file path
-     */
-    public static Parser getParser(File file, String tcPath)
-            throws IOException, LiusException, JDOMException {
-        LiusConfig tc = LiusConfig.getInstance(tcPath);
-        return getParser(file, tc);
-    }
+        ParserConfig pc = tc.getParserConfig(mimeType);
+
+        if (pc == null) {
+            String message =
+                    "Could not find parser configuration for mime type "
+                    + mimeType + ".";
 
+            logger.error(message);
+            throw new LiusException(message);
+        }
+
+        return pc;
+    }
 }

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java?rev=578843&r1=578842&r2=578843&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/html/HtmlParser.java Mon Sep 24 07:44:18 2007
@@ -17,17 +17,17 @@
 package org.apache.tika.parser.html;
 
 import java.io.InputStream;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.log4j.Logger;
+import org.apache.oro.text.regex.MalformedPatternException;
 import org.apache.tika.config.Content;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.utils.RegexUtils;
-
-import org.apache.log4j.Logger;
-import org.apache.oro.text.regex.MalformedPatternException;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
@@ -61,6 +61,12 @@
         }
         List<Content> ctt = super.getContents();
         contentsMap = new HashMap<String, Content>();
+
+
+        if (ctt == null) {
+            return new ArrayList<Content>(0);
+        }
+
         Iterator i = ctt.iterator();
         while (i.hasNext()) {
             Content ct = (Content) i.next();

Added: incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java?rev=578843&view=auto
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java (added)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/utils/ParseUtils.java Mon Sep 24 07:44:18 2007
@@ -0,0 +1,277 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.utils;
+
+// JDK imports
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+
+// TIKA imports
+import org.apache.tika.config.LiusConfig;
+import org.apache.tika.exception.LiusException;
+import org.apache.tika.metadata.TikaMimeKeys;
+import org.apache.tika.mime.MimeUtils;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserFactory;
+
+/**
+ * Contains utility methods for parsing documents. Intended to provide simple
+ * entry points into the Tika framework.
+ */
+public class ParseUtils implements TikaMimeKeys {
+
+    private static final Configuration conf = new Configuration();
+
+    static {
+        conf.set(TIKA_MIME_FILE, "org/apache/tika/mime/tika-mimetypes.xml");
+    }
+
+    private static final MimeUtils mimeUtils = new MimeUtils(conf);
+
+    /**
+     * Returns a parser that can handle the specified MIME type, and is set to
+     * receive input from a stream opened from the specified URL. NB: Close the
+     * input stream when it is no longer needed!
+     * 
+     * @param inputStream
+     *            stream containing document data to parse
+     * @param config
+     * @param mimeType
+     *            the document's MIME type
+     * @return a parser appropriate to this MIME type and ready to read input
+     *         from the specified document
+     * @throws LiusException
+     * @throws IOException
+     */
+    public static Parser getParser(InputStream inputStream, LiusConfig config,
+            String mimeType) throws LiusException, IOException {
+
+        if (inputStream == null) {
+            throw new LiusException("Document input stream not provided.");
+        }
+
+        return ParserFactory.getParser(inputStream, mimeType, config);
+    }
+
+    // Note that we cannot provide a method that takes an InputStream
+    // but not a MIME type, since we will not have a resource
+    // name from which to derive it.
+
+    /**
+     * Returns a parser that can handle the specified MIME type, and is set to
+     * receive input from a stream opened from the specified URL. NB: Close the
+     * input stream when it is no longer needed!
+     * 
+     * @param documentUrl
+     *            URL pointing to the document to parse
+     * @param config
+     * @param mimeType
+     *            the document's MIME type
+     * @return a parser appropriate to this MIME type and ready to read input
+     *         from the specified document
+     * @throws LiusException
+     * @throws IOException
+     */
+    public static Parser getParser(URL documentUrl, LiusConfig config,
+            String mimeType) throws LiusException, IOException {
+
+        if (documentUrl == null) {
+            throw new LiusException("Document URL not provided.");
+        }
+
+        return ParserFactory.getParser(documentUrl.openStream(), mimeType,
+                config);
+    }
+
+    /**
+     * Returns a parser that can handle the specified MIME type, and is set to
+     * receive input from a stream opened from the specified URL. The MIME type
+     * is determined automatically. NB: Close the input stream when it is no
+     * longer needed!
+     * 
+     * @param documentUrl
+     *            URL pointing to the document to parse
+     * @param config
+     * @return a parser appropriate to this MIME type and ready to read input
+     *         from the specified document
+     * @throws LiusException
+     * @throws IOException
+     */
+    public static Parser getParser(URL documentUrl, LiusConfig config)
+            throws LiusException, IOException {
+
+        String mimetype = mimeUtils.getRepository().getMimeType(documentUrl)
+                .getName();
+        return getParser(documentUrl, config, mimetype);
+    }
+
+    /**
+     * Returns a parser that can handle the specified MIME type, and is set to
+     * receive input from a stream opened from the specified URL. NB: Close the
+     * input stream when it is no longer needed!
+     * 
+     * @param documentFile
+     *            File object pointing to the document to parse
+     * @param config
+     * @param mimeType
+     *            the document's MIME type
+     * @return a parser appropriate to this MIME type and ready to read input
+     *         from the specified document
+     * @throws LiusException
+     * @throws IOException
+     */
+    public static Parser getParser(File documentFile, LiusConfig config,
+            String mimeType) throws LiusException, IOException {
+
+        if (documentFile == null) {
+            throw new LiusException("Document file not provided.");
+        }
+
+        if (!documentFile.canRead()) {
+            throw new LiusException(
+                    "Document file does not exist or is not readable.");
+        }
+
+        FileInputStream inputStream = new FileInputStream(documentFile);
+        // TODO: Do we want to wrap a BufferedInputStream, or does the
+        // file's buffering suffice?
+
+        return ParserFactory.getParser(inputStream, mimeType, config);
+    }
+
+    /**
+     * Returns a parser that can handle the specified MIME type, and is set to
+     * receive input from a stream opened from the specified URL. NB: Close the
+     * input stream when it is no longer needed!
+     * 
+     * @param documentFile
+     *            File object pointing to the document to parse
+     * @param config
+     * @return a parser appropriate to this MIME type and ready to read input
+     *         from the specified document
+     * @throws LiusException
+     * @throws IOException
+     */
+    public static Parser getParser(File documentFile, LiusConfig config)
+            throws LiusException, IOException {
+
+        String mimetype = mimeUtils.getRepository().getMimeType(documentFile)
+                .getName();
+        return getParser(documentFile, config, mimetype);
+    }
+
+    /**
+     * Gets the string content of a document read from an input stream.
+     * 
+     * @param inputStream
+     *            the stream from which to read document data
+     * @param config
+     * @param mimeType
+     *            MIME type of the data
+     * @return the string content parsed from the document
+     * @throws LiusException
+     * @throws IOException
+     */
+    public static String getStringContent(InputStream inputStream,
+            LiusConfig config, String mimeType) throws LiusException,
+            IOException {
+
+        Parser parser = getParser(inputStream, config, mimeType);
+        return getStringContent(parser);
+    }
+
+    /**
+     * Gets the string content of a document read from an input stream.
+     * 
+     * @param documentUrl
+     *            URL pointing to the document to parse
+     * @param config
+     * @return the string content parsed from the document
+     * @throws LiusException
+     * @throws IOException
+     */
+    public static String getStringContent(URL documentUrl, LiusConfig config)
+            throws LiusException, IOException {
+
+        Parser parser = getParser(documentUrl, config);
+        return getStringContent(parser);
+    }
+
+    /**
+     * Gets the string content of a document read from an input stream.
+     * 
+     * @param documentUrl
+     *            URL pointing to the document to parse
+     * @param config
+     * @param mimeType
+     *            MIME type of the data
+     * @return the string content parsed from the document
+     * @throws LiusException
+     * @throws IOException
+     */
+    public static String getStringContent(URL documentUrl, LiusConfig config,
+            String mimeType) throws LiusException, IOException {
+
+        Parser parser = getParser(documentUrl, config, mimeType);
+        return getStringContent(parser);
+    }
+
+    /**
+     * Gets the string content of a document read from an input stream.
+     * 
+     * @param documentFile
+     *            File object pointing to the document to parse
+     * @param config
+     * @param mimeType
+     *            MIME type of the data
+     * @return the string content parsed from the document
+     * @throws LiusException
+     * @throws IOException
+     */
+    public static String getStringContent(File documentFile, LiusConfig config,
+            String mimeType) throws LiusException, IOException {
+
+        Parser parser = getParser(documentFile, config, mimeType);
+        return getStringContent(parser);
+    }
+
+    /**
+     * Gets the string content of a document read from an input stream.
+     * 
+     * @param documentFile
+     *            File object pointing to the document to parse
+     * @param config
+     * @return the string content parsed from the document
+     * @throws LiusException
+     * @throws IOException
+     */
+    public static String getStringContent(File documentFile, LiusConfig config)
+            throws LiusException, IOException {
+
+        Parser parser = getParser(documentFile, config);
+        return getStringContent(parser);
+    }
+
+    private static String getStringContent(Parser parser) throws IOException {
+        String content = parser.getStrContent();
+        parser.getInputStream().close();
+        return content;
+    }
+}

Modified: incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml?rev=578843&r1=578842&r2=578843&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml (original)
+++ incubator/tika/trunk/src/main/resources/mime/tika-mimetypes.xml Mon Sep 24 07:44:18 2007
@@ -156,6 +156,7 @@
 	</mime-type>
 
 	<mime-type type="application/rtf">
+	    <glob pattern="*.rtf"/>
 		<alias type="text/rtf" />
 	</mime-type>
 

Modified: incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java?rev=578843&r1=578842&r2=578843&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java (original)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java Mon Sep 24 07:44:18 2007
@@ -18,19 +18,17 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.util.Collection;
-import java.util.StringTokenizer;
-
-import junit.framework.TestCase;
 
 import org.apache.tika.config.Content;
 import org.apache.tika.config.LiusConfig;
 import org.apache.tika.log.LiusLogger;
 import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.ParserFactory;
+import org.apache.tika.utils.ParseUtils;
 import org.apache.tika.utils.Utils;
 import org.jdom.JDOMException;
 
+import junit.framework.TestCase;
+
 /**
  * Junit test class   
  * @author Rida Benjelloun (ridabenjelloun@apache.org)  
@@ -67,58 +65,97 @@
         LiusLogger.setLoggerConfigFile(log4jPropertiesFilename);
 
     }
-    
-    /*
-     * public void testConfig(){ TikaConfig tc =
-     * TikaConfig.getInstance("C:\\tika\\config\\tikaConfig2.xml"); ParserConfig
-     * pc = tc.getParserConfig("text/html"); assertEquals("parse-html",
-     * pc.getName()); }
-     */
 
     public void testPDFExtraction() throws Exception {
-        ParserFactory.getParser(getTestFile("testPDF.pdf"), tc);
+        File file = getTestFile("testPDF.pdf");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc, "application/pdf");
+
+        Parser parser = ParseUtils.getParser(file, tc);
+        String s3 = parser.getStrContent();
+
+        assertEquals(s1, s2);
+        assertEquals(s1, s3);
     }
 
     public void testTXTExtraction() throws Exception {
-        ParserFactory.getParser(getTestFile("testTXT.txt"), tc);
+        File file = getTestFile("testTXT.txt");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc, "text/plain");
+        assertEquals(s1, s2);
     }
 
     public void testRTFExtraction() throws Exception {
-        ParserFactory.getParser(getTestFile("testRTF.rtf"), tc);
+        File file = getTestFile("testRTF.rtf");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc, "application/rtf");
+        assertEquals(s1, s2);
     }
 
     public void testXMLExtraction() throws Exception {
-        ParserFactory.getParser(getTestFile("testXML.xml"), tc);
+        File file = getTestFile("testXML.xml");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc, "application/xml");
+        assertEquals(s1, s2);
     }
 
     public void testPPTExtraction() throws Exception {
-        ParserFactory.getParser(getTestFile("testPPT.ppt"), tc);
+        File file = getTestFile("testPPT.ppt");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc,
+                "application/vnd.ms-powerpoint");
+        assertEquals(s1, s2);
     }
 
     public void testWORDxtraction() throws Exception {
-        ParserFactory.getParser(getTestFile("testWORD.doc"), tc);
+        File file = getTestFile("testWORD.doc");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc, "application/msword");
+        assertEquals(s1, s2);
     }
 
     public void testEXCELExtraction() throws Exception {
-        ParserFactory.getParser(getTestFile("testEXCEL.xls"), tc);
+        File file = getTestFile("testEXCEL.xls");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc,
+                "application/vnd.ms-excel");
+        assertEquals(s1, s2);
     }
 
     public void testOOExtraction() throws Exception {
-        ParserFactory.getParser(getTestFile("testOpenOffice2.odt"), tc);
+        File file = getTestFile("testOpenOffice2.odt");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc,
+                "application/vnd.oasis.opendocument.text");
+        assertEquals(s1, s2);
     }
 
     public void testHTMLExtraction() throws Exception {
-        Parser parser = ParserFactory.getParser(getTestFile("testHTML.html"), tc);
-        assertEquals("Title : Test Indexation Html", (parser.getContent("title")).getValue());
-        assertEquals("text/html",parser.getMimeType());
-        final String text = Utils.toString(parser.getContents());
+        File file = getTestFile("testHTML.html");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc, "text/html");
+        assertEquals(s1, s2);
+
+        Parser parser = ParseUtils.getParser(file, tc);
+        assertNotNull(parser);
+        assertEquals("org.apache.tika.parser.html.HtmlParser", parser.getClass().getName());
+
         
+        Content content = parser.getContent("title");
+        assertNotNull(content);
+        assertEquals("Title : Test Indexation Html", content.getValue());
+
+        assertEquals("text/html", parser.getMimeType());
+
+        final String text = Utils.toString(parser.getContents());
         final String expected = "Test Indexation Html";
-        assertTrue("text contains '" + expected + "'",text.indexOf(expected) >= 0);
+        assertTrue("text contains '" + expected + "'",
+                text.contains(expected));
+        parser.getInputStream().close();
     }
 
     private File getTestFile(String filename) {
-      return new File(testFilesBaseDir,filename); 
+      return new File(testFilesBaseDir, filename);
     }
 
 }