You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bd...@apache.org on 2007/09/15 11:33:08 UTC

svn commit: r575896 - in /incubator/tika/trunk: ./ src/main/java/org/apache/tika/config/ src/main/java/org/apache/tika/parser/ src/main/java/org/apache/tika/utils/ src/test/java/org/apache/tika/ src/test/java/org/apache/tika/utils/ src/test/resources/ ...

Author: bdelacretaz
Date: Sat Sep 15 02:33:07 2007
New Revision: 575896

URL: http://svn.apache.org/viewvc?rev=575896&view=rev
Log:
TIKA-19: fix org.apache.tika.TestParsers, test more file types and improve exception handling in LiusConfig and ParserFactory. Includes fixes from TIKA-16 and TIKA-14 which were contributed by Keith R. Bennett, thanks!

Added:
    incubator/tika/trunk/src/test/resources/test-documents/
    incubator/tika/trunk/src/test/resources/test-documents/testEXCEL.xls   (with props)
    incubator/tika/trunk/src/test/resources/test-documents/testHTML.html
      - copied unchanged from r575888, incubator/tika/trunk/src/test/resources/testHTML.html
    incubator/tika/trunk/src/test/resources/test-documents/testOpenOffice2.odt   (with props)
    incubator/tika/trunk/src/test/resources/test-documents/testPDF.pdf   (with props)
    incubator/tika/trunk/src/test/resources/test-documents/testPPT.ppt   (with props)
    incubator/tika/trunk/src/test/resources/test-documents/testRTF.rtf
      - copied unchanged from r575888, incubator/tika/trunk/src/test/resources/testRTF.rtf
    incubator/tika/trunk/src/test/resources/test-documents/testTXT.txt
      - copied unchanged from r575888, incubator/tika/trunk/src/test/resources/testTXT.txt
    incubator/tika/trunk/src/test/resources/test-documents/testWORD.doc   (with props)
    incubator/tika/trunk/src/test/resources/test-documents/testXML.xml
      - copied, changed from r575888, incubator/tika/trunk/src/test/resources/testXML.xml
Removed:
    incubator/tika/trunk/src/test/resources/testHTML.html
    incubator/tika/trunk/src/test/resources/testRTF.rtf
    incubator/tika/trunk/src/test/resources/testTXT.txt
    incubator/tika/trunk/src/test/resources/testXML.xml
Modified:
    incubator/tika/trunk/   (props changed)
    incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java
    incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java
    incubator/tika/trunk/src/main/java/org/apache/tika/utils/MimeTypesUtils.java
    incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java
    incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
    incubator/tika/trunk/src/test/java/org/apache/tika/utils/MimeTypesUtilsTest.java

Propchange: incubator/tika/trunk/
------------------------------------------------------------------------------
--- svn:ignore (original)
+++ svn:ignore Sat Sep 15 02:33:07 2007
@@ -2,3 +2,4 @@
 .project
 .settings
 .classpath
+lius.log

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java?rev=575896&r1=575895&r2=575896&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/config/LiusConfig.java Sat Sep 15 02:33:07 2007
@@ -49,7 +49,7 @@
 
     private static String currentFile;
 
-    public static LiusConfig getInstance(String configFile) {
+    public static LiusConfig getInstance(String configFile) throws JDOMException,IOException {
 
         if (configsCache.containsKey(configFile)) {
             return (LiusConfig) configsCache.get(configFile);
@@ -86,15 +86,17 @@
         return pc;
     }
 
-    private static Document parse(String file) {
+    private static Document parse(String file) throws JDOMException,IOException {
         org.jdom.Document xmlDoc = new org.jdom.Document();
         try {
             SAXBuilder builder = new SAXBuilder();
             xmlDoc = builder.build(new File(file));
-        } catch (JDOMException e) {
-            logger.error(e.getMessage());
-        } catch (IOException e) {
-            logger.error(e.getMessage());
+        } catch (JDOMException jde) {
+            logger.error(jde.getMessage(),jde);
+            throw jde;
+        } catch(IOException ioe) {
+          logger.error(ioe.getMessage(),ioe);
+          throw ioe;
         }
         return xmlDoc;
 

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java?rev=575896&r1=575895&r2=575896&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/ParserFactory.java Sat Sep 15 02:33:07 2007
@@ -26,6 +26,7 @@
 import org.apache.tika.utils.MimeTypesUtils;
 
 import org.apache.log4j.Logger;
+import org.jdom.JDOMException;
 
 /**
  * Factory class. Build parser from xml config file.
@@ -41,15 +42,27 @@
      */
     public static Parser getParser(File file, LiusConfig tc)
             throws IOException, LiusException {
+        if(!file.canRead()) {
+          throw new IOException("Cannot read input file " + file.getAbsoluteFile());
+        }
         String mimeType = MimeTypesUtils.getMimeType(file);
         ParserConfig pc = tc.getParserConfig(mimeType);
+        if(pc==null) {
+          throw new LiusException(
+              "No ParserConfig available for mime-type '" + mimeType + "'"
+              + " for file " + file.getName()
+          );
+        }
         String className = pc.getParserClass();
         Parser parser = null;
         Class<?> parserClass = null;
         if (className != null) {
             try {
-                logger.info("Loading parser class = " + className
-                        + " MimeType = " + mimeType);
+                logger.debug(
+                    "Loading parser class = " + className
+                    + " MimeType = " + mimeType
+                    + " for file " + file.getName()
+                );
 
                 parserClass = Class.forName(className);
                 parser = (Parser) parserClass.newInstance();
@@ -83,7 +96,7 @@
      * Build parser from string file path and Lius config file path
      */
     public static Parser getParser(String str, String tcPath)
-            throws IOException, LiusException {
+            throws IOException, LiusException, JDOMException {
         LiusConfig tc = LiusConfig.getInstance(tcPath);
         return getParser(new File(str), tc);
     }
@@ -92,7 +105,7 @@
      * Build parser from file and Lius config file path
      */
     public static Parser getParser(File file, String tcPath)
-            throws IOException, LiusException {
+            throws IOException, LiusException, JDOMException {
         LiusConfig tc = LiusConfig.getInstance(tcPath);
         return getParser(file, tc);
     }

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/utils/MimeTypesUtils.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/utils/MimeTypesUtils.java?rev=575896&r1=575895&r2=575896&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/utils/MimeTypesUtils.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/utils/MimeTypesUtils.java Sat Sep 15 02:33:07 2007
@@ -54,7 +54,11 @@
             return "application/vnd.ms-excel";
         } else if (name.endsWith(".zip")) {
             return "application/zip";
-        } else {
+        } else if (name.endsWith(".rtf")) {
+          return "application/rtf";
+        } else if (name.endsWith(".odt")) {
+           return "application/vnd.oasis.opendocument.text";         
+         } else {
             return "application/octet-stream";
         }
     }

Modified: incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java?rev=575896&r1=575895&r2=575896&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java (original)
+++ incubator/tika/trunk/src/main/java/org/apache/tika/utils/Utils.java Sat Sep 15 02:33:07 2007
@@ -24,6 +24,10 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.io.Writer;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
@@ -31,9 +35,8 @@
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
 
-import org.apache.tika.config.Content;
-
 import org.apache.log4j.Logger;
+import org.apache.tika.config.Content;
 import org.jdom.Document;
 import org.jdom.JDOMException;
 import org.jdom.input.SAXBuilder;
@@ -50,26 +53,36 @@
 
     static Logger logger = Logger.getRootLogger();
 
+    public static String toString(Collection<Content> structuredContent) {
+      final StringWriter sw = new StringWriter();
+      print(structuredContent,sw);
+      return sw.toString();
+    }
+    
     public static void print(Collection<Content> structuredContent) {
+      print(structuredContent,new OutputStreamWriter(System.out));
+    }
+    
+    public static void print(Collection<Content> structuredContent,Writer outputWriter) {
+        final PrintWriter output = new PrintWriter(outputWriter,true);
         for (Iterator<Content> iter = structuredContent.iterator(); iter
                 .hasNext();) {
             Content ct = iter.next();
             if (ct.getValue() != null) {
-                System.out.print(ct.getName() + ": ");
-                System.out.println(ct.getValue());
+                output.print(ct.getName() + ": ");
+                output.println(ct.getValue());
             } else if (ct.getValues() != null) {
 
-                System.out.print(ct.getName() + ": ");
+                output.print(ct.getName() + ": ");
                 for (int j = 0; j < ct.getValues().length; j++) {
                     if (j == 0)
-                        System.out.println(ct.getValues()[j]);
+                        output.println(ct.getValues()[j]);
                     else {
-                        System.out.println("\t" + ct.getValues()[j]);
+                        output.println("\t" + ct.getValues()[j]);
                     }
                 }
             }
         }
-
     }
 
     public static Document parse(InputStream is) {

Modified: incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java?rev=575896&r1=575895&r2=575896&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java (original)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/TestParsers.java Sat Sep 15 02:33:07 2007
@@ -22,13 +22,14 @@
 import java.util.StringTokenizer;
 
 import junit.framework.TestCase;
+
 import org.apache.tika.config.Content;
 import org.apache.tika.config.LiusConfig;
-import org.apache.tika.exception.LiusException;
 import org.apache.tika.log.LiusLogger;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParserFactory;
 import org.apache.tika.utils.Utils;
+import org.jdom.JDOMException;
 
 /**
  * Junit test class   
@@ -37,12 +38,13 @@
 public class TestParsers extends TestCase {
 
     private LiusConfig tc;
+    private File testFilesBaseDir; 
 
-    private File classDir;
-
-    private String config;
-
-    public void setUp() {
+    public void setUp() throws JDOMException, IOException {
+        /* FIXME the old mechanism does not work anymore when running the tests
+         * with Maven - need a resource-based one, but this means more
+         * changes to classes which rely on filenames.
+         *  
         String sep = File.separator;
         StringTokenizer st = new StringTokenizer(System.getProperty(
                 "java.class.path"), File.pathSeparator);
@@ -53,13 +55,19 @@
 
         String log4j = classDir.getParent() + sep + "Config" + sep + "log4j"
                 + sep + "log4j.properties";
+         */ 
 
-        tc = LiusConfig.getInstance(config);
+        // FIXME for now, fix filenames according to Maven testing layout
+        final String liusConfigFilename = "target/classes/config.xml";
+        final String log4jPropertiesFilename = "target/classes/log4j/log4j.properties";
+        testFilesBaseDir = new File("src/test/resources/test-documents");
+        
+        tc = LiusConfig.getInstance(liusConfigFilename);
 
-        LiusLogger.setLoggerConfigFile(log4j);
+        LiusLogger.setLoggerConfigFile(log4jPropertiesFilename);
 
     }
-
+    
     /*
      * public void testConfig(){ TikaConfig tc =
      * TikaConfig.getInstance("C:\\tika\\config\\tikaConfig2.xml"); ParserConfig
@@ -67,149 +75,50 @@
      * pc.getName()); }
      */
 
-    public void testPDFExtraction() {
-        Parser parser = null;
-        File testFile = new File(classDir.getParent() + File.separator
-                + "testFiles" + File.separator + "testPDF.PDF");
-        try {
-            parser = ParserFactory.getParser(testFile, tc);
-        } catch (IOException e) {
-            e.printStackTrace();
-        } catch (LiusException e) {
-            e.printStackTrace();
-        }
-
-    }
-
-    public void testTXTExtraction() {
-        Parser parser = null;
-        File testFile = new File(classDir.getParent() + File.separator
-                + "testFiles" + File.separator + "testTXT.txt");
-        try {
-            parser = ParserFactory.getParser(testFile, tc);
-        } catch (IOException e) {
-            e.printStackTrace();
-        } catch (LiusException e) {
-            e.printStackTrace();
-        }
-
-    }
-
-    public void testRTFExtraction() {
-        Parser parser = null;
-        File testFile = new File(classDir.getParent() + File.separator
-                + "testFiles" + File.separator + "testRTF.rtf");
-        try {
-            parser = ParserFactory.getParser(testFile, tc);
-        } catch (IOException e) {
-            e.printStackTrace();
-        } catch (LiusException e) {
-            e.printStackTrace();
-        }
-
-    }
-
-    public void testXMLExtraction() {
-        Parser parser = null;
-        File testFile = new File(classDir.getParent() + File.separator
-                + "testFiles" + File.separator + "testXML.xml");
-        try {
-            parser = ParserFactory.getParser(testFile, tc);
-        } catch (IOException e) {
-            e.printStackTrace();
-        } catch (LiusException e) {
-            e.printStackTrace();
-        }
-
-    }
-
-    public void testPPTExtraction() {
-        Parser parser = null;
-        File testFile = new File(classDir.getParent() + File.separator
-                + "testFiles" + File.separator + "testPPT.ppt");
-        try {
-            parser = ParserFactory.getParser(testFile, tc);
-            System.out.println(parser.getStrContent());
-        } catch (IOException e) {
-            e.printStackTrace();
-        } catch (LiusException e) {
-            e.printStackTrace();
-        }
-
-    }
-
-    public void testWORDxtraction() {
-        Parser parser = null;
-        File testFile = new File(classDir.getParent() + File.separator
-                + "testFiles" + File.separator + "testWORD.doc");
-        try {
-            parser = ParserFactory.getParser(testFile, tc);
-            System.out.println(parser.getStrContent());
-        } catch (IOException e) {
-            e.printStackTrace();
-        } catch (LiusException e) {
-            e.printStackTrace();
-        }
-
-    }
-
-    public void testEXCELExtraction() {
-        Parser parser = null;
-        File testFile = new File(classDir.getParent() + File.separator
-                + "testFiles" + File.separator + "testEXCEL.xls");
-        try {
-            parser = ParserFactory.getParser(testFile, tc);
-            // System.out.println(parser.getStrContent());
-            printContentsInfo(parser);
-        } catch (IOException e) {
-            e.printStackTrace();
-        } catch (LiusException e) {
-            e.printStackTrace();
-        }
-
-    }
-
-    public void testOOExtraction() {
-        Parser parser = null;
-        File testFile = new File(classDir.getParent() + File.separator
-                + "testFiles" + File.separator + "testOO2.odt");
-        try {
-            parser = ParserFactory.getParser(testFile, tc);
-            // System.out.println(parser.getStrContent());
-            printContentsInfo(parser);
-        } catch (IOException e) {
-            e.printStackTrace();
-        } catch (LiusException e) {
-            e.printStackTrace();
-        }
-        
+    public void testPDFExtraction() throws Exception {
+        ParserFactory.getParser(getTestFile("testPDF.pdf"), tc);
+    }
+
+    public void testTXTExtraction() throws Exception {
+        ParserFactory.getParser(getTestFile("testTXT.txt"), tc);
+    }
+
+    public void testRTFExtraction() throws Exception {
+        ParserFactory.getParser(getTestFile("testRTF.rtf"), tc);
     }
 
-    public void testHTMLExtraction() {
-        Parser parser = null;
-        File testFile = new File(classDir.getParent() + File.separator
-                + "testFiles" + File.separator + "testHTML.html");
-        try {
-            parser = ParserFactory.getParser(testFile, tc);
-            assertEquals("Title : Test Indexation Html", (parser.getContent("title")).getValue());
-            // System.out.println(parser.getStrContent());
-            printContentsInfo(parser);
-        } catch (IOException e) {
-            e.printStackTrace();
-        } catch (LiusException e) {
-            e.printStackTrace();
-        }
+    public void testXMLExtraction() throws Exception {
+        ParserFactory.getParser(getTestFile("testXML.xml"), tc);
+    }
+
+    public void testPPTExtraction() throws Exception {
+        ParserFactory.getParser(getTestFile("testPPT.ppt"), tc);
+    }
+
+    public void testWORDxtraction() throws Exception {
+        ParserFactory.getParser(getTestFile("testWORD.doc"), tc);
+    }
 
+    public void testEXCELExtraction() throws Exception {
+        ParserFactory.getParser(getTestFile("testEXCEL.xls"), tc);
+    }
+
+    public void testOOExtraction() throws Exception {
+        ParserFactory.getParser(getTestFile("testOpenOffice2.odt"), tc);
+    }
+
+    public void testHTMLExtraction() throws Exception {
+        Parser parser = ParserFactory.getParser(getTestFile("testHTML.html"), tc);
+        assertEquals("Title : Test Indexation Html", (parser.getContent("title")).getValue());
+        assertEquals("text/html",parser.getMimeType());
+        final String text = Utils.toString(parser.getContents());
+        
+        final String expected = "Test Indexation Html";
+        assertTrue("text contains '" + expected + "'",text.indexOf(expected) >= 0);
     }
 
-    private void printContentsInfo(Parser parser) {
-        String mimeType = parser.getMimeType();
-        System.out.println("Mime : " + mimeType);
-        String strContent = parser.getStrContent();
-        Collection<Content> structuredContent = parser.getContents();
-        Utils.print(structuredContent);
-        System.out.println("==============");
-        // Content title = parser.getContent("title");
+    private File getTestFile(String filename) {
+      return new File(testFilesBaseDir,filename); 
     }
 
 }

Modified: incubator/tika/trunk/src/test/java/org/apache/tika/utils/MimeTypesUtilsTest.java
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/utils/MimeTypesUtilsTest.java?rev=575896&r1=575895&r2=575896&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/java/org/apache/tika/utils/MimeTypesUtilsTest.java (original)
+++ incubator/tika/trunk/src/test/java/org/apache/tika/utils/MimeTypesUtilsTest.java Sat Sep 15 02:33:07 2007
@@ -25,13 +25,22 @@
 public class MimeTypesUtilsTest extends TestCase {
 
     public void test() throws MalformedURLException {
-        String s =                          "x.pdf";
         URL u = new URL("http://mydomain.com/x.pdf?x=y");
-        File f = new File(           "/a/b/c/x.pdf");
+        File f = new File("/a/b/c/x.pdf");
 
-        assertEquals("application/pdf", MimeTypesUtils.getMimeType(s));
-        assertEquals("application/pdf", MimeTypesUtils.getMimeType(u));
-        assertEquals("application/pdf", MimeTypesUtils.getMimeType(f));
+        assertEquals("application/pdf",MimeTypesUtils.getMimeType("x.pdf"));
+        assertEquals("application/pdf",MimeTypesUtils.getMimeType(u));
+        assertEquals("application/pdf",MimeTypesUtils.getMimeType(f));
+        assertEquals("text/plain",MimeTypesUtils.getMimeType("x.txt"));
+        assertEquals("text/html",MimeTypesUtils.getMimeType("x.htm"));
+        assertEquals("text/html",MimeTypesUtils.getMimeType("x.html"));
+        assertEquals("application/xhtml+xml",MimeTypesUtils.getMimeType("x.xhtml"));
+        assertEquals("application/xml",MimeTypesUtils.getMimeType("x.xml"));
+        assertEquals("application/msword",MimeTypesUtils.getMimeType("x.doc"));
+        assertEquals("application/vnd.ms-powerpoint",MimeTypesUtils.getMimeType("x.ppt"));
+        assertEquals("application/vnd.ms-excel",MimeTypesUtils.getMimeType("x.xls"));
+        assertEquals("application/zip",MimeTypesUtils.getMimeType("x.zip"));
+        assertEquals("application/vnd.oasis.opendocument.text",MimeTypesUtils.getMimeType("x.odt"));
+        assertEquals("application/octet-stream",MimeTypesUtils.getMimeType("x.xyz"));
     }
-
 }

Added: incubator/tika/trunk/src/test/resources/test-documents/testEXCEL.xls
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testEXCEL.xls?rev=575896&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/tika/trunk/src/test/resources/test-documents/testEXCEL.xls
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/tika/trunk/src/test/resources/test-documents/testOpenOffice2.odt
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testOpenOffice2.odt?rev=575896&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/tika/trunk/src/test/resources/test-documents/testOpenOffice2.odt
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/tika/trunk/src/test/resources/test-documents/testPDF.pdf
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testPDF.pdf?rev=575896&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/tika/trunk/src/test/resources/test-documents/testPDF.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/pdf

Added: incubator/tika/trunk/src/test/resources/test-documents/testPPT.ppt
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testPPT.ppt?rev=575896&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/tika/trunk/src/test/resources/test-documents/testPPT.ppt
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/tika/trunk/src/test/resources/test-documents/testWORD.doc
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testWORD.doc?rev=575896&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/tika/trunk/src/test/resources/test-documents/testWORD.doc
------------------------------------------------------------------------------
    svn:mime-type = application/msword

Copied: incubator/tika/trunk/src/test/resources/test-documents/testXML.xml (from r575888, incubator/tika/trunk/src/test/resources/testXML.xml)
URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testXML.xml?p2=incubator/tika/trunk/src/test/resources/test-documents/testXML.xml&p1=incubator/tika/trunk/src/test/resources/testXML.xml&r1=575888&r2=575896&rev=575896&view=diff
==============================================================================
--- incubator/tika/trunk/src/test/resources/testXML.xml (original)
+++ incubator/tika/trunk/src/test/resources/test-documents/testXML.xml Sat Sep 15 02:33:07 2007
@@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
 <oaidc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:oaidc="http://www.openarchives.org/OAI/2.0/oai_dc/">
 
 	<dc:title>Archimède et Lius</dc:title>