You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2007/11/18 23:21:13 UTC

svn commit: r596146 [23/36] - in /incubator/tika/site: ./ apidocs/ apidocs/org/ apidocs/org/apache/ apidocs/org/apache/tika/ apidocs/org/apache/tika/config/ apidocs/org/apache/tika/config/class-use/ apidocs/org/apache/tika/exception/ apidocs/org/apache...

Added: incubator/tika/site/rat-report.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/rat-report.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/rat-report.html (added)
+++ incubator/tika/site/rat-report.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,638 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+
+
+
+
+
+
+
+
+<html>
+  <head>
+    <title>Apache Tika - RAT (Release Audit Tool) results</title>
+    <style type="text/css" media="all">
+      @import url("./css/maven-base.css");
+      @import url("./css/maven-theme.css");
+      @import url("./css/site.css");
+    </style>
+    <link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
+        <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
+      </head>
+  <body class="composite">
+    <div id="banner">
+                  <a href="" id="bannerLeft">
+    
+                                            <img src="tika.png" alt="Apache Tika" />
+    
+            </a>
+                        <a href="../" id="bannerRight">
+    
+                                    <img src="http://incubator.apache.org/images/apache-incubator-logo.png" alt="Apache Incubator" />
+    
+            </a>
+            <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="breadcrumbs">
+          
+  
+
+  
+    
+  
+  
+              <div class="xright">      <a href="http://www.apache.org/">Apache</a>
+          |
+          <a href="../">Incubator</a>
+          |
+          <a href="http://lucene.apache.org/">Lucene</a>
+          
+  
+
+  
+    
+  
+  
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="leftColumn">
+      <div id="navcolumn">
+           
+  
+
+  
+    
+  
+  
+                   <h5>Apache Tika</h5>
+        <ul>
+              
+    <li class="none">
+              <a href="index.html">Introduction</a>
+        </li>
+          </ul>
+          <h5>Project Documentation</h5>
+        <ul>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+              
+        <li class="collapsed">
+              <a href="project-info.html">Project Information</a>
+              </li>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+            
+            
+      
+            
+      
+              
+            <li class="expanded">
+              <a href="project-reports.html">Project Reports</a>
+                <ul>
+                  
+    <li class="none">
+              <a href="checkstyle.html">Checkstyle</a>
+        </li>
+                  
+    <li class="none">
+              <a href="findbugs.html">FindBugs Report</a>
+        </li>
+                  
+    <li class="none">
+              <a href="apidocs/index.html">JavaDocs</a>
+        </li>
+                  
+    <li class="none">
+              <a href="surefire-report.html">Maven Surefire Report</a>
+        </li>
+                  
+    <li class="none">
+              <strong>RAT Report</strong>
+        </li>
+                  
+    <li class="none">
+              <a href="xref/index.html">Source Xref</a>
+        </li>
+                  
+    <li class="none">
+              <a href="xref-test/index.html">Test Source Xref</a>
+        </li>
+              </ul>
+        </li>
+          </ul>
+                                       <a href="http://maven.apache.org/" title="Built by Maven" id="poweredBy">
+            <img alt="Built by Maven" src="./images/logos/maven-feather.png"></img>
+          </a>
+                       
+  
+
+  
+    
+  
+  
+        </div>
+    </div>
+    <div id="bodyColumn">
+      <div id="contentBox">
+        <div class="section"><h2>RAT (Release Audit Tool) results</h2><p>The following document contains the results of <a href="http://code.google.com/p/arat/">RAT (Release Audit Tool)</a>.</p><p><div class="source"><pre>
+*****************************************************
+Summary
+-------
+Notes: 3
+Binaries: 7
+Archives: 1
+Standards: 88
+
+Apache Licensed: 77
+Generated Documents: 0
+
+JavaDocs are generated and so license header is optional
+Generated files do not required license headers
+
+11 Unknown Licenses
+
+*******************************
+
+Archives (+ indicates readable, $ unreadable): 
+
+ + src/test/resources/test-documents/test-documents.zip
+ 
+*****************************************************
+  Files with AL headers will be marked L
+  Binary files (which do not require AL headers) will be marked B
+  Compressed archives will be marked A
+  Notices, licenses etc will be marked N
+  AL    bin.xml
+ !????? CHANGES.txt
+  N     LICENSE.txt
+  N     NOTICE.txt
+  AL    pom.xml
+  N     README.txt
+  AL    src/main/java/org/apache/tika/config/TikaConfig.java
+  AL    src/main/java/org/apache/tika/exception/TikaException.java
+  AL    src/main/java/org/apache/tika/metadata/CreativeCommons.java
+  AL    src/main/java/org/apache/tika/metadata/DublinCore.java
+  AL    src/main/java/org/apache/tika/metadata/HttpHeaders.java
+  AL    src/main/java/org/apache/tika/metadata/Metadata.java
+  AL    src/main/java/org/apache/tika/metadata/MSOffice.java
+ !????? src/main/java/org/apache/tika/metadata/package.html
+  AL    src/main/java/org/apache/tika/metadata/SpellCheckedMetadata.java
+  AL    src/main/java/org/apache/tika/metadata/TikaMetadataKeys.java
+  AL    src/main/java/org/apache/tika/metadata/TikaMimeKeys.java
+  AL    src/main/java/org/apache/tika/mime/Clause.java
+  AL    src/main/java/org/apache/tika/mime/HexCoDec.java
+  AL    src/main/java/org/apache/tika/mime/Magic.java
+  AL    src/main/java/org/apache/tika/mime/MagicClause.java
+  AL    src/main/java/org/apache/tika/mime/MagicMatch.java
+  AL    src/main/java/org/apache/tika/mime/MimeType.java
+  AL    src/main/java/org/apache/tika/mime/MimeTypeException.java
+  AL    src/main/java/org/apache/tika/mime/MimeTypes.java
+  AL    src/main/java/org/apache/tika/mime/MimeTypesFactory.java
+  AL    src/main/java/org/apache/tika/mime/MimeTypesReader.java
+  AL    src/main/java/org/apache/tika/mime/Operator.java
+  AL    src/main/java/org/apache/tika/mime/Patterns.java
+  AL    src/main/java/org/apache/tika/parser/AutoDetectParser.java
+  AL    src/main/java/org/apache/tika/parser/EmptyParser.java
+  AL    src/main/java/org/apache/tika/parser/ErrorParser.java
+  AL    src/main/java/org/apache/tika/parser/html/HtmlParser.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/ExcelParser.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/FilteredStringWriter.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/PowerPointExtractor.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/PowerPointParser.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/PPTConstants.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/Slide.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/TextBox.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/Word6CHPBinTable.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/Word6Extractor.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/WordParser.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/WordTextBuffer.java
+  AL    src/main/java/org/apache/tika/parser/microsoft/WordTextPiece.java
+  AL    src/main/java/org/apache/tika/parser/opendocument/OpenOfficeEntityResolver.java
+  AL    src/main/java/org/apache/tika/parser/opendocument/OpenOfficeParser.java
+  AL    src/main/java/org/apache/tika/parser/Parser.java
+  AL    src/main/java/org/apache/tika/parser/ParserDecorator.java
+  AL    src/main/java/org/apache/tika/parser/ParserPostProcessor.java
+  AL    src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
+  AL    src/main/java/org/apache/tika/parser/pdf/PDFParser.java
+  AL    src/main/java/org/apache/tika/parser/rtf/RTFParser.java
+  AL    src/main/java/org/apache/tika/parser/txt/TXTParser.java
+  AL    src/main/java/org/apache/tika/parser/xml/XMLParser.java
+  AL    src/main/java/org/apache/tika/sax/ContentHandlerDecorator.java
+  AL    src/main/java/org/apache/tika/sax/TeeContentHandler.java
+  AL    src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
+  AL    src/main/java/org/apache/tika/sax/XHTMLContentHandler.java
+  AL    src/main/java/org/apache/tika/utils/ParseUtils.java
+  AL    src/main/java/org/apache/tika/utils/RegexUtils.java
+  AL    src/main/java/org/apache/tika/utils/RereadableInputStream.java
+  AL    src/main/java/org/apache/tika/utils/StringUtil.java
+  AL    src/main/java/org/apache/tika/utils/Utils.java
+  AL    src/main/resources/mime/tika-mimetypes.xml
+ !????? src/main/resources/tika-config.xml
+  AL    src/site/apt/index.apt
+  B     src/site/resources/tika.png
+  B     src/site/resources/tika.xcf
+ !????? src/site/SITE-README.txt
+  AL    src/site/site.xml
+  AL    src/test/java/org/apache/tika/metadata/TestMetadata.java
+  AL    src/test/java/org/apache/tika/metadata/TestSpellCheckedMetadata.java
+  AL    src/test/java/org/apache/tika/mime/MimeTypesTest.java
+  AL    src/test/java/org/apache/tika/mime/MimeTypeTest.java
+  AL    src/test/java/org/apache/tika/mime/TestMimeTypes.java
+  AL    src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
+  AL    src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
+  AL    src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
+  AL    src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
+  AL    src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
+  AL    src/test/java/org/apache/tika/parser/txt/TXTParserTest.java
+  AL    src/test/java/org/apache/tika/TestParsers.java
+  AL    src/test/java/org/apache/tika/TestRereadableInputStream.java
+ !????? src/test/resources/log4j/log4j.properties
+  A     src/test/resources/test-documents/test-documents.zip
+  B     src/test/resources/test-documents/testEXCEL.xls
+ !????? src/test/resources/test-documents/testHTML.html
+ !????? src/test/resources/test-documents/testHTML_utf8.html
+  B     src/test/resources/test-documents/testOpenOffice2.odt
+  B     src/test/resources/test-documents/testPDF.pdf
+  B     src/test/resources/test-documents/testPPT.ppt
+ !????? src/test/resources/test-documents/testRTF.rtf
+ !????? src/test/resources/test-documents/testTXT.txt
+  B     src/test/resources/test-documents/testWORD.doc
+ !????? src/test/resources/test-documents/testXML.xml
+  AL    src.xml
+ !????? tika.log
+ 
+ *****************************************************
+ Printing headers for files without AL header...
+ 
+ 
+ =======================================================================
+ ==CHANGES.txt
+ =======================================================================
+ Tika Change Log
+
+Unreleased changes (0.1-dev)
+
+1. TIKA-5 - Port Metadata Framework from Nutch (mattmann)
+
+2. TIKA-11 - Consolidate test classes into a src/test/java directory tree (mattmann)
+
+3. TIKA-15 - Utils.print does not print a Content having no value (jukka)
+
+4. TIKA-19 - org.apache.tika.TestParsers fails (bdelacretaz)
+
+5. TIKA-16 - Issues with data files used for testing by TestParsers (bdelacretaz)
+
+6. TIKA-14 - MimeTypeUtils.getMimeType() returns the default mime type for 
+             .odt (Open Office) file (bdelacretaz)
+
+7. TIKA-12 - Add URL capability to MimeTypesUtils (jukka)
+
+8. TIKA-13 - Fix obsolete package names in config.xml (siren)
+
+9. TIKA-10 - Remove MimeInfoException catch clauses and import from TestParsers (siren)
+
+10. TIKA-8 - Replaced the jmimeinfo dependency with a trivial mime type detector (jukka)
+
+11. TIKA-7 - Added the Lius Lite code. Added missing dependencies to POM (jukka)
+
+12. TIKA-18 - &quot;Office&quot; interface should be renamed &quot;MSOffice&quot; (mattmann)
+
+13. TIKA-23 - Decouple Parser from ParserConfig (jukka)
+
+14. TIKA-6 - Port Nutch (or better) MimeType detection system into Tika (J. Charron &amp; mattmann)
+
+15. TIKA-25 - Removed hardcoded reference to C:\oo.xml in OpenOfficeParser (K. Bennett &amp; jukka)
+
+16. TIKA-17 - Need to support URL's for input resources. (K. Bennett &amp; mattmann)
+
+17. TIKA-22 - Remove @author tags from the java source (mattmann)
+
+18. TIKA-21 - Simplified configuration code (jukka)
+
+19. TIKA-17 - Rename all &quot;Lius&quot; classes to be &quot;Tika&quot; classes (jukka)
+
+20. TIKA-30 - Added utility constructors to TikaConfig (K. Bennett &amp; jukka)
+
+21. TIKA-28 - Rename config.xml to tika-config.xml or similar (mattmann)
+
+22. TIKA-26 - Use Map&lt;String, Content&gt; instead of List&lt;Content&gt; (jukka)
+
+23. TIKA-31 - protected Parser.parse(InputStream stream,
+
+ =======================================================================
+ ==src/main/java/org/apache/tika/metadata/package.html
+ =======================================================================
+ &lt;html&gt;
+&lt;body&gt;
+A Multi-valued Metadata container, and set
+of constant fields for Tika Metadata.
+&lt;/body&gt;
+&lt;/html&gt;
+
+ =======================================================================
+ ==src/main/resources/tika-config.xml
+ =======================================================================
+ &lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;
+&lt;properties&gt;
+
+    &lt;mimeTypeRepository resource=&quot;/org/apache/tika/mime/tika-mimetypes.xml&quot; magic=&quot;false&quot;/&gt;
+
+    &lt;parsers&gt;
+
+        &lt;parser name=&quot;text-xml&quot; class=&quot;org.apache.tika.parser.xml.XMLParser&quot;&gt;
+                &lt;mime&gt;application/xml&lt;/mime&gt;
+        &lt;/parser&gt;
+
+        &lt;parser name=&quot;parse-msword&quot; class=&quot;org.apache.tika.parser.microsoft.WordParser&quot;&gt;
+                &lt;mime&gt;application/msword&lt;/mime&gt;
+        &lt;/parser&gt;
+
+        &lt;parser name=&quot;parse-msexcel&quot; class=&quot;org.apache.tika.parser.microsoft.ExcelParser&quot;&gt;
+                &lt;mime&gt;application/vnd.ms-excel&lt;/mime&gt;
+        &lt;/parser&gt;
+
+        &lt;parser name=&quot;parse-mspowerpoint&quot; class=&quot;org.apache.tika.parser.microsoft.PowerPointParser&quot;&gt;
+                &lt;mime&gt;application/vnd.ms-powerpoint&lt;/mime&gt;
+        &lt;/parser&gt;
+
+        &lt;parser name=&quot;parse-html&quot; class=&quot;org.apache.tika.parser.html.HtmlParser&quot;&gt;
+                &lt;mime&gt;text/html&lt;/mime&gt;
+                &lt;mime&gt;application/x-asp&lt;/mime&gt;
+        &lt;/parser&gt;
+
+        &lt;parser mame=&quot;parse-rtf&quot; class=&quot;org.apache.tika.parser.rtf.RTFParser&quot;&gt;
+                &lt;mime&gt;application/rtf&lt;/mime&gt;
+        &lt;/parser&gt;
+
+        &lt;parser name=&quot;parse-pdf&quot; class=&quot;org.apache.tika.parser.pdf.PDFParser&quot;&gt;
+                &lt;mime&gt;application/pdf&lt;/mime&gt;
+        &lt;/parser&gt;
+
+        &lt;parser name=&quot;parse-txt&quot; class=&quot;org.apache.tika.parser.txt.TXTParser&quot;&gt;
+                &lt;mime&gt;text/plain&lt;/mime&gt;
+        &lt;/parser&gt;
+
+        &lt;parser name=&quot;parse-openoffice&quot; class=&quot;org.apache.tika.parser.opendocument.OpenOfficeParser&quot;&gt;            
+                &lt;mime&gt;application/vnd.sun.xml.writer&lt;/mime&gt;
+                &lt;mime&gt;application/vnd.oasis.opendocument.text&lt;/mime&gt;
+                &lt;mime&gt;application/vnd.oasis.opendocument.graphics&lt;/mime&gt;
+                &lt;mime&gt;application/vnd.oasis.opendocument.presentation&lt;/mime&gt;
+                &lt;mime&gt;application/vnd.oasis.opendocument.spreadsheet&lt;/mime&gt;
+                &lt;mime&gt;application/vnd.oasis.opendocument.chart&lt;/mime&gt;
+                &lt;mime&gt;application/vnd.oasis.opendocument.image&lt;/mime&gt;
+                &lt;mime&gt;application/vnd.oasis.opendocument.formula&lt;/mime&gt;
+                &lt;mime&gt;application/vnd.oasis.opendocument.text-master&lt;/mime&gt;
+
+ =======================================================================
+ ==src/site/SITE-README.txt
+ =======================================================================
+ Here's how to update the live Tika website:
+(http://incubator.apache.org/tika/)
+
+1) Edit the content found here
+
+2) Run &quot;mvn site&quot; to generate the website pages
+
+3) Check the new content at target/site/index.html
+
+4) Checkout https://svn.apache.org/repos/asf/incubator/tika/site
+	and update the changed pages there
+	
+5) Commit your changes, both here and in the tika/site module
+
+6) To activate the changes on the live website, login to 
+	people.apache.org and run svn up in /www/incubator.apache.org/tika
+	
+7) That directory is replicated to the live website every few hours, so
+	your changes can take some time to be live.
+	
+Easy and fun, isn't it? ;-)
+
+This will get better once Tika graduates from the incubator.	 		 
+
+
+ =======================================================================
+ ==src/test/resources/log4j/log4j.properties
+ =======================================================================
+ #info,debug, error,fatal ...
+log4j.rootLogger=info,stdout,R
+
+#console
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+
+# Pattern to output the caller's file name and line number.
+log4j.appender.stdout.layout.ConversionPattern=%5p [%t] (%F:%L) - %m%n
+
+#file
+log4j.appender.R=org.apache.log4j.RollingFileAppender
+log4j.appender.R.File=tika.log
+
+log4j.appender.R.MaxFileSize=100KB
+# Keep one backup file
+log4j.appender.R.MaxBackupIndex=1
+
+log4j.appender.R.layout=org.apache.log4j.PatternLayout
+log4j.appender.R.layout.ConversionPattern=%p %t %c - %m%n     
+
+
+ =======================================================================
+ ==src/test/resources/test-documents/testHTML.html
+ =======================================================================
+ &lt;html&gt;
+	&lt;head&gt;
+		&lt;title&gt;Title : Test Indexation Html&lt;/title&gt;	
+	&lt;/head&gt;
+	&lt;body&gt;
+		&lt;h1&gt;Test Indexation Html&lt;/h1&gt;
+		&lt;p&gt;Indexation du fichier&lt;/p&gt;
+	&lt;/body&gt;	
+&lt;/html&gt;
+
+ =======================================================================
+ ==src/test/resources/test-documents/testHTML_utf8.html
+ =======================================================================
+ &lt;html&gt;
+	&lt;head&gt;
+		&lt;title&gt;Title : Tilte with UTF-8 chars ???§??&lt;/title&gt;	
+	&lt;/head&gt;
+	&lt;body&gt;
+		&lt;h1&gt;Content with UTF-8 chars&lt;/h1&gt;
+		&lt;p&gt;???§??&lt;/p&gt;
+	&lt;/body&gt;	
+&lt;/html&gt;
+
+ =======================================================================
+ ==src/test/resources/test-documents/testRTF.rtf
+ =======================================================================
+ {\rtf1\ansi\ansicpg1252\uc1\deff0\stshfdbch0\stshfloch0\stshfhich0\stshfbi0\deflang1036\deflangfe1036{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f37\froman\fcharset238\fprq2 Times New Roman CE;}
+{\f38\froman\fcharset204\fprq2 Times New Roman Cyr;}{\f40\froman\fcharset161\fprq2 Times New Roman Greek;}{\f41\froman\fcharset162\fprq2 Times New Roman Tur;}{\f42\froman\fcharset177\fprq2 Times New Roman (Hebrew);}
+{\f43\froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f44\froman\fcharset186\fprq2 Times New Roman Baltic;}{\f45\froman\fcharset163\fprq2 Times New Roman (Vietnamese);}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;
+\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;
+\red128\green128\blue128;\red192\green192\blue192;}{\stylesheet{\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1036\langfe1036\cgrid\langnp1036\langfenp1036 \snext0 Normal;}{\*\cs10 \additive \ssemihidden 
+Default Paragraph Font;}{\*\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv 
+\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1024\langfe1024\cgrid\langnp1024\langfenp1024 \snext11 \ssemihidden Normal Table;}}{\*\latentstyles\lsdstimax156\lsdlockeddef0}{\*\rsidtbl \rsid2954171\rsid10375891}
+{\*\generator Microsoft Word 11.0.6568;}{\info{\title Test d\'92indexation Word}{\author Bibliotheque}{\operator Bibliotheque}{\creatim\yr2006\mo5\dy18\hr12\min19}{\revtim\yr2006\mo5\dy18\hr12\min19}{\version2}{\edmins0}{\nofpages1}{\nofwords3}
+{\nofchars21}{\*\company Universite Laval}{\nofcharsws23}{\vern24579}}\paperw11906\paperh16838\margl1417\margr1417\margt1417\margb1417 
+\deftab708\widowctrl\ftnbj\aenddoc\hyphhotz425\noxlattoyen\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\formshade\horzdoc\dgmargin\dghspace180\dgvspace180\dghorigin1417\dgvorigin1417\dghshow1\dgvshow1
+\jexpand\viewkind1\viewscale100\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\allowfieldendsel\wrppunct\asianbrkrule\nojkernpunct\rsidroot2954171 \fet0
+\sectd \linex0\headery708\footery708\colsx708\endnhere\sectlinegrid360\sectdefaultcl\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3
+\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}
+{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain 
+\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs24\lang1036\langfe1036\cgrid\langnp1036\langfenp1036 {\insrsid2954171 Test d\rquote indexation Word
+\par 
+\par }}
+
+ =======================================================================
+ ==src/test/resources/test-documents/testTXT.txt
+ =======================================================================
+ Test d'indexation de Txt
+http://www.apache.org
+
+ =======================================================================
+ ==src/test/resources/test-documents/testXML.xml
+ =======================================================================
+ &lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;
+&lt;oaidc:dc xmlns:dc=&quot;http://purl.org/dc/elements/1.1/&quot; xmlns:oaidc=&quot;http://www.openarchives.org/OAI/2.0/oai_dc/&quot;&gt;
+
+	&lt;dc:title&gt;Archim?®de et Lius&lt;/dc:title&gt;
+
+	&lt;dc:creator&gt;Rida Benjelloun&lt;/dc:creator&gt;
+
+	&lt;dc:subject&gt;Java&lt;/dc:subject&gt;
+
+	&lt;dc:subject&gt;XML&lt;/dc:subject&gt;
+
+	&lt;dc:subject&gt;XSLT&lt;/dc:subject&gt;
+
+	&lt;dc:subject&gt;JDOM&lt;/dc:subject&gt;
+ 
+	&lt;dc:subject&gt;Indexation&lt;/dc:subject&gt;
+
+	&lt;dc:description&gt;Framework d'indexation des documents XML, HTML, PDF etc.. &lt;/dc:description&gt;
+
+	&lt;dc:identifier&gt;http://www.apache.org&lt;/dc:identifier&gt;
+
+	&lt;dc:date&gt;2000-12&lt;/dc:date&gt;
+
+	&lt;dc:type&gt;test&lt;/dc:type&gt;
+
+	&lt;dc:format&gt;application/msword&lt;/dc:format&gt;
+
+	&lt;dc:language&gt;Fr&lt;/dc:language&gt;
+
+	&lt;dc:rights&gt;Non restreint&lt;/dc:rights&gt;	
+
+&lt;/oaidc:dc&gt;
+
+ =======================================================================
+ ==tika.log
+ =======================================================================
+ INFO main root - Loading parser class = org.apache.tika.parser.pdf.PDFParser
+     INFO main root - Loading parser class = org.apache.tika.parser.pdf.PDFParser
+     INFO main root - Loading parser class = org.apache.tika.parser.pdf.PDFParser
+     INFO main root - Loading parser class = org.apache.tika.parser.txt.TXTParser
+     INFO main root - Loading parser class = org.apache.tika.parser.txt.TXTParser
+     INFO main root - Loading parser class = org.apache.tika.parser.rtf.RTFParser
+     INFO main root - Loading parser class = org.apache.tika.parser.rtf.RTFParser
+     INFO main root - Loading parser class = org.apache.tika.parser.xml.XMLParser
+     INFO main root - Loading parser class = org.apache.tika.parser.xml.XMLParser
+     INFO main root - Loading parser class = org.apache.tika.parser.mspowerpoint.MsPowerPointParser
+     INFO main root - Loading parser class = org.apache.tika.parser.mspowerpoint.MsPowerPointParser
+     INFO main root - Loading parser class = org.apache.tika.parser.mspowerpoint.MsPowerPointParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msword.MsWordParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msword.MsWordParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msword.MsWordParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msexcel.MsExcelParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msexcel.MsExcelParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msexcel.MsExcelParser
+     INFO main root - Loading parser class = org.apache.tika.parser.opendocument.OpenOfficeParser
+     INFO main root - Loading parser class = org.apache.tika.parser.opendocument.OpenOfficeParser
+     INFO main root - Loading parser class = org.apache.tika.parser.html.HtmlParser
+     INFO main root - Loading parser class = org.apache.tika.parser.html.HtmlParser
+     INFO main root - Loading parser class = org.apache.tika.parser.html.HtmlParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msexcel.MsExcelParser
+     INFO main root - Loading parser class = org.apache.tika.parser.html.HtmlParser
+     INFO main root - Loading parser class = org.apache.tika.parser.opendocument.OpenOfficeParser
+     INFO main root - Loading parser class = org.apache.tika.parser.pdf.PDFParser
+     INFO main root - Loading parser class = org.apache.tika.parser.mspowerpoint.MsPowerPointParser
+     INFO main root - Loading parser class = org.apache.tika.parser.rtf.RTFParser
+     INFO main root - Loading parser class = org.apache.tika.parser.txt.TXTParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msword.MsWordParser
+     INFO main root - Loading parser class = org.apache.tika.parser.xml.XMLParser
+     INFO main root - Loading parser class = org.apache.tika.parser.pdf.PDFParser
+     INFO main root - Loading parser class = org.apache.tika.parser.pdf.PDFParser
+     INFO main root - Loading parser class = org.apache.tika.parser.pdf.PDFParser
+     INFO main root - Loading parser class = org.apache.tika.parser.txt.TXTParser
+     INFO main root - Loading parser class = org.apache.tika.parser.txt.TXTParser
+     INFO main root - Loading parser class = org.apache.tika.parser.rtf.RTFParser
+     INFO main root - Loading parser class = org.apache.tika.parser.rtf.RTFParser
+     INFO main root - Loading parser class = org.apache.tika.parser.xml.XMLParser
+     INFO main root - Loading parser class = org.apache.tika.parser.xml.XMLParser
+     INFO main root - Loading parser class = org.apache.tika.parser.mspowerpoint.MsPowerPointParser
+     INFO main root - Loading parser class = org.apache.tika.parser.mspowerpoint.MsPowerPointParser
+     INFO main root - Loading parser class = org.apache.tika.parser.mspowerpoint.MsPowerPointParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msword.MsWordParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msword.MsWordParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msword.MsWordParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msexcel.MsExcelParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msexcel.MsExcelParser
+     INFO main root - Loading parser class = org.apache.tika.parser.msexcel.MsExcelParser
+</pre></div></p>
+      </div>
+    </div>
+    <div class="clear">
+      <hr/>
+    </div>
+    <div id="footer">
+      <div class="xright">&#169;  
+          2007
+    
+          The Apache Software Foundation
+          
+  
+
+  
+    
+  
+  
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+  </body>
+</html>

Modified: incubator/tika/site/source-repository.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/source-repository.html?rev=596146&r1=596145&r2=596146&view=diff
==============================================================================
--- incubator/tika/site/source-repository.html (original)
+++ incubator/tika/site/source-repository.html Sun Nov 18 14:20:54 2007
@@ -135,6 +135,26 @@
         </li>
               </ul>
         </li>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+              
+        <li class="collapsed">
+              <a href="project-reports.html">Project Reports</a>
+              </li>
           </ul>
                                        <a href="http://maven.apache.org/" title="Built by Maven" id="poweredBy">
             <img alt="Built by Maven" src="./images/logos/maven-feather.png"></img>
@@ -163,7 +183,7 @@
       <div class="xright">&#169;  
           2007
     
-          Apache Software Foundation
+          The Apache Software Foundation
           
   
 

Added: incubator/tika/site/surefire-report.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/surefire-report.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/surefire-report.html (added)
+++ incubator/tika/site/surefire-report.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,207 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+
+
+
+
+
+
+
+
+<html>
+  <head>
+    <title>Apache Tika - </title>
+    <style type="text/css" media="all">
+      @import url("./css/maven-base.css");
+      @import url("./css/maven-theme.css");
+      @import url("./css/site.css");
+    </style>
+    <link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
+        <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
+      </head>
+  <body class="composite">
+    <div id="banner">
+                  <a href="" id="bannerLeft">
+    
+                                            <img src="tika.png" alt="Apache Tika" />
+    
+            </a>
+                        <a href="../" id="bannerRight">
+    
+                                    <img src="http://incubator.apache.org/images/apache-incubator-logo.png" alt="Apache Incubator" />
+    
+            </a>
+            <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="breadcrumbs">
+          
+  
+
+  
+    
+  
+  
+              <div class="xright">      <a href="http://www.apache.org/">Apache</a>
+          |
+          <a href="../">Incubator</a>
+          |
+          <a href="http://lucene.apache.org/">Lucene</a>
+          
+  
+
+  
+    
+  
+  
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="leftColumn">
+      <div id="navcolumn">
+           
+  
+
+  
+    
+  
+  
+                   <h5>Apache Tika</h5>
+        <ul>
+              
+    <li class="none">
+              <a href="index.html">Introduction</a>
+        </li>
+          </ul>
+          <h5>Project Documentation</h5>
+        <ul>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+              
+        <li class="collapsed">
+              <a href="project-info.html">Project Information</a>
+              </li>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+            
+            
+      
+            
+      
+            
+      
+              
+            <li class="expanded">
+              <a href="project-reports.html">Project Reports</a>
+                <ul>
+                  
+    <li class="none">
+              <a href="checkstyle.html">Checkstyle</a>
+        </li>
+                  
+    <li class="none">
+              <a href="findbugs.html">FindBugs Report</a>
+        </li>
+                  
+    <li class="none">
+              <a href="apidocs/index.html">JavaDocs</a>
+        </li>
+                  
+    <li class="none">
+              <strong>Maven Surefire Report</strong>
+        </li>
+                  
+    <li class="none">
+              <a href="rat-report.html">RAT Report</a>
+        </li>
+                  
+    <li class="none">
+              <a href="xref/index.html">Source Xref</a>
+        </li>
+                  
+    <li class="none">
+              <a href="xref-test/index.html">Test Source Xref</a>
+        </li>
+              </ul>
+        </li>
+          </ul>
+                                       <a href="http://maven.apache.org/" title="Built by Maven" id="poweredBy">
+            <img alt="Built by Maven" src="./images/logos/maven-feather.png"></img>
+          </a>
+                       
+  
+
+  
+    
+  
+  
+        </div>
+    </div>
+    <div id="bodyColumn">
+      <div id="contentBox">
+        <script type="text/javascript">
+function toggleDisplay(elementId) {
+ var elm = document.getElementById(elementId + 'error');
+ if (elm && typeof elm.style != "undefined") {
+ if (elm.style.display == "none") {
+ elm.style.display = "";
+ document.getElementById(elementId + 'off').style.display = "none";
+ document.getElementById(elementId + 'on').style.display = "inline";
+ } else if (elm.style.display == "") { elm.style.display = "none";
+ document.getElementById(elementId + 'off').style.display = "inline";
+ document.getElementById(elementId + 'on').style.display = "none";
+ } 
+ } 
+ }
+</script><h2><a name="Summary"></a>Summary</h2><div class="section">[<a href="#Summary">Summary</a>][<a href="#Package_List">Package List</a>][<a href="#Test_Cases">Test Cases</a>]</div><table class="bodyTable"><tr class="a"></tr><tr class="b"></tr></table><table class="bodyTable"><tr class="a"><th>Tests</th><th>Errors </th><th>Failures</th><th>Skipped</th><th>Success Rate</th><th>Time</th></tr><tr class="b"><td>59</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>6.364</td></tr></table><br />Note: failures are anticipated and checked for with assertions while errors are unanticipated.<table class="bodyTable"><tr class="a"></tr><tr class="b"></tr></table><h2><a name="Package_List"></a>Package List</h2><div class="section">[<a href="#Summary">Summary</a>][<a href="#Package_List">Package List</a>][<a href="#Test_Cases">Test Cases</a>]</div><table class="bodyTable"><tr class="a"></tr><tr class="b"></tr></table><table class="bodyTable"><tr class="a"><th>Package</th><th>Tests</t
 h><th>Errors </th><th>Failures</th><th>Skipped</th><th>Success Rate</th><th>Time</th></tr><tr class="b"><td><a href="#org.apache.tika.parser.txt">org.apache.tika.parser.txt</a></td><td>3</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.006</td></tr><tr class="a"><td><a href="#org.apache.tika.parser.html">org.apache.tika.parser.html</a></td><td>2</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.087</td></tr><tr class="b"><td><a href="#org.apache.tika.mime">org.apache.tika.mime</a></td><td>11</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.11</td></tr><tr class="a"><td><a href="#org.apache.tika">org.apache.tika</a></td><td>13</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>1.967</td></tr><tr class="b"><td><a href="#org.apache.tika.parser">org.apache.tika.parser</a></td><td>9</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.86</td></tr><tr class="a"><td><a href="#org.apache.tika.metadata">org.apache.tika.metadata</a></td><td>18</td><td>0</td><td>0</td><td>0<
 /td><td>100%</td><td>3.271</td></tr><tr class="b"><td><a href="#org.apache.tika.parser.microsoft">org.apache.tika.parser.microsoft</a></td><td>3</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.063</td></tr></table><br />Note: package statistics are not computed recursively, they only sum up all of its testsuites numbers.<h3><a name="org.apache.tika.parser.txt"></a>org.apache.tika.parser.txt</h3><table class="bodyTable"><tr class="a"><th></th><th>Class</th><th>Tests</th><th>Errors </th><th>Failures</th><th>Skipped</th><th>Success Rate</th><th>Time</th></tr><tr class="b"><td><a href="#org.apache.tika.parser.txtTXTParserTest"><img src="images/icon_success_sml.gif" /></a></td><td><a href="#org.apache.tika.parser.txtTXTParserTest">TXTParserTest</a></td><td>3</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.006</td></tr></table><h3><a name="org.apache.tika.parser.html"></a>org.apache.tika.parser.html</h3><table class="bodyTable"><tr class="a"><th></th><th>Class</th><th>Te
 sts</th><th>Errors </th><th>Failures</th><th>Skipped</th><th>Success Rate</th><th>Time</th></tr><tr class="b"><td><a href="#org.apache.tika.parser.htmlHtmlParserTest"><img src="images/icon_success_sml.gif" /></a></td><td><a href="#org.apache.tika.parser.htmlHtmlParserTest">HtmlParserTest</a></td><td>2</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.087</td></tr></table><h3><a name="org.apache.tika.mime"></a>org.apache.tika.mime</h3><table class="bodyTable"><tr class="a"><th></th><th>Class</th><th>Tests</th><th>Errors </th><th>Failures</th><th>Skipped</th><th>Success Rate</th><th>Time</th></tr><tr class="b"><td><a href="#org.apache.tika.mimeMimeTypesTest"><img src="images/icon_success_sml.gif" /></a></td><td><a href="#org.apache.tika.mimeMimeTypesTest">MimeTypesTest</a></td><td>6</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.005</td></tr><tr class="a"><td><a href="#org.apache.tika.mimeMimeTypeTest"><img src="images/icon_success_sml.gif" /></a></td><td><a href="#or
 g.apache.tika.mimeMimeTypeTest">MimeTypeTest</a></td><td>1</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.024</td></tr><tr class="b"><td><a href="#org.apache.tika.mimeTestMimeTypes"><img src="images/icon_success_sml.gif" /></a></td><td><a href="#org.apache.tika.mimeTestMimeTypes">TestMimeTypes</a></td><td>4</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.081</td></tr></table><h3><a name="org.apache.tika"></a>org.apache.tika</h3><table class="bodyTable"><tr class="a"><th></th><th>Class</th><th>Tests</th><th>Errors </th><th>Failures</th><th>Skipped</th><th>Success Rate</th><th>Time</th></tr><tr class="b"><td><a href="#org.apache.tikaTestParsers"><img src="images/icon_success_sml.gif" /></a></td><td><a href="#org.apache.tikaTestParsers">TestParsers</a></td><td>10</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>1.95</td></tr><tr class="a"><td><a href="#org.apache.tikaTestRereadableInputStream"><img src="images/icon_success_sml.gif" /></a></td><td><a href="#org.apac
 he.tikaTestRereadableInputStream">TestRereadableInputStream</a></td><td>3</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.017</td></tr></table><h3><a name="org.apache.tika.parser"></a>org.apache.tika.parser</h3><table class="bodyTable"><tr class="b"><th></th><th>Class</th><th>Tests</th><th>Errors </th><th>Failures</th><th>Skipped</th><th>Success Rate</th><th>Time</th></tr><tr class="a"><td><a href="#org.apache.tika.parserAutoDetectParserTest"><img src="images/icon_success_sml.gif" /></a></td><td><a href="#org.apache.tika.parserAutoDetectParserTest">AutoDetectParserTest</a></td><td>9</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.86</td></tr></table><h3><a name="org.apache.tika.metadata"></a>org.apache.tika.metadata</h3><table class="bodyTable"><tr class="b"><th></th><th>Class</th><th>Tests</th><th>Errors </th><th>Failures</th><th>Skipped</th><th>Success Rate</th><th>Time</th></tr><tr class="a"><td><a href="#org.apache.tika.metadataTestMetadata"><img src="images/ic
 on_success_sml.gif" /></a></td><td><a href="#org.apache.tika.metadataTestMetadata">TestMetadata</a></td><td>8</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.005</td></tr><tr class="b"><td><a href="#org.apache.tika.metadataTestSpellCheckedMetadata"><img src="images/icon_success_sml.gif" /></a></td><td><a href="#org.apache.tika.metadataTestSpellCheckedMetadata">TestSpellCheckedMetadata</a></td><td>10</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>3.266</td></tr></table><h3><a name="org.apache.tika.parser.microsoft"></a>org.apache.tika.parser.microsoft</h3><table class="bodyTable"><tr class="a"><th></th><th>Class</th><th>Tests</th><th>Errors </th><th>Failures</th><th>Skipped</th><th>Success Rate</th><th>Time</th></tr><tr class="b"><td><a href="#org.apache.tika.parser.microsoftExcelParserTest"><img src="images/icon_success_sml.gif" /></a></td><td><a href="#org.apache.tika.parser.microsoftExcelParserTest">ExcelParserTest</a></td><td>1</td><td>0</td><td>0</td><td>0</td><
 td>100%</td><td>0.026</td></tr><tr class="a"><td><a href="#org.apache.tika.parser.microsoftPowerPointParserTest"><img src="images/icon_success_sml.gif" /></a></td><td><a href="#org.apache.tika.parser.microsoftPowerPointParserTest">PowerPointParserTest</a></td><td>1</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.035</td></tr><tr class="b"><td><a href="#org.apache.tika.parser.microsoftWordParserTest"><img src="images/icon_success_sml.gif" /></a></td><td><a href="#org.apache.tika.parser.microsoftWordParserTest">WordParserTest</a></td><td>1</td><td>0</td><td>0</td><td>0</td><td>100%</td><td>0.002</td></tr></table><table class="bodyTable"><tr class="a"></tr><tr class="b"></tr></table><h2><a name="Test_Cases"></a>Test Cases</h2><div class="section">[<a href="#Summary">Summary</a>][<a href="#Package_List">Package List</a>][<a href="#Test_Cases">Test Cases</a>]</div><h3><a name="org.apache.tika.metadataTestMetadata"></a>TestMetadata</h3><table class="bodyTable"><tr class="a"><
 td><img src="images/icon_success_sml.gif" /></td><td>testSet</td><td>0</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testObject</td><td>0</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testAdd</td><td>0</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testSetProperties</td><td>0</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testGet</td><td>0</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testIsMultiValued</td><td>0</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testNames</td><td>0</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testRemove</td><td>0</td></tr></table><h3><a name="org.apache.tika.metadataTestSpellCheckedMetadata"></a>TestSpellCheckedMetadata</h3><table class="bodyTable"><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testSet</td><td>0.0
 09</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testObject</td><td>0.002</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testAdd</td><td>0.001</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testSetProperties</td><td>0</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testGet</td><td>0.001</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testIsMultiValued</td><td>0</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testNames</td><td>0</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testRemove</td><td>0</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testGetNormalizedName</td><td>0</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testHandlingSpeed</td><td>3.221</td></tr></table><h3><a name="org.apache.tika.mimeMimeTypesTest
 "></a>MimeTypesTest</h3><table class="bodyTable"><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testForName</td><td>0</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testAddAlias</td><td>0</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testSuperType</td><td>0</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testSubTypes</td><td>0.001</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testIsDescendantOf</td><td>0.001</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testCompareTo</td><td>0</td></tr></table><h3><a name="org.apache.tika.mimeMimeTypeTest"></a>MimeTypeTest</h3><table class="bodyTable"><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testIsValidName</td><td>0.003</td></tr></table><h3><a name="org.apache.tika.mimeTestMimeTypes"></a>TestMimeTypes</h3><table class="bodyTable"><tr class="b">
 <td><img src="images/icon_success_sml.gif" /></td><td>testCaseSensitivity</td><td>0.001</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testLoadMimeTypes</td><td>0</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testGuessMimeTypes</td><td>0</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testMimeDeterminationForTestDocuments</td><td>0.019</td></tr></table><h3><a name="org.apache.tika.parserAutoDetectParserTest"></a>AutoDetectParserTest</h3><table class="bodyTable"><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testExcel</td><td>0.099</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testHTML</td><td>0.053</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testOpenOffice</td><td>0.16</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testPDF</td><td>0.199</td></tr><tr class="b"><td><img src="
 images/icon_success_sml.gif" /></td><td>testPowerpoint</td><td>0.057</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testRTF</td><td>0.118</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testText</td><td>0.045</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testWord</td><td>0.065</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testXML</td><td>0.061</td></tr></table><h3><a name="org.apache.tika.parser.htmlHtmlParserTest"></a>HtmlParserTest</h3><table class="bodyTable"><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testParseAscii</td><td>0.075</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testParseEmpty</td><td>0.01</td></tr></table><h3><a name="org.apache.tika.parser.microsoftExcelParserTest"></a>ExcelParserTest</h3><table class="bodyTable"><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>tes
 tExcelParser</td><td>0.025</td></tr></table><h3><a name="org.apache.tika.parser.microsoftPowerPointParserTest"></a>PowerPointParserTest</h3><table class="bodyTable"><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testPowerPointParser</td><td>0.034</td></tr></table><h3><a name="org.apache.tika.parser.microsoftWordParserTest"></a>WordParserTest</h3><table class="bodyTable"><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testWordParser</td><td>0.001</td></tr></table><h3><a name="org.apache.tika.parser.txtTXTParserTest"></a>TXTParserTest</h3><table class="bodyTable"><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testEnglishText</td><td>0.001</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testUTF8Text</td><td>0.001</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testEmptyText</td><td>0</td></tr></table><h3><a name="org.apache.tikaTestParsers"></a>TestParsers</h3
 ><table class="bodyTable"><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testPDFExtraction</td><td>0.687</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testTXTExtraction</td><td>0.027</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testRTFExtraction</td><td>0.314</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testXMLExtraction</td><td>0.04</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testPPTExtraction</td><td>0.098</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testWORDxtraction</td><td>0.035</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testEXCELExtraction</td><td>0.208</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testOOExtraction</td><td>0.058</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testHTMLExtraction</td>
 <td>0.023</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testZipExtraction</td><td>0.454</td></tr></table><h3><a name="org.apache.tikaTestRereadableInputStream"></a>TestRereadableInputStream</h3><table class="bodyTable"><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>testRewind</td><td>0.008</td></tr><tr class="b"><td><img src="images/icon_success_sml.gif" /></td><td>testCloseBehavior</td><td>0.001</td></tr><tr class="a"><td><img src="images/icon_success_sml.gif" /></td><td>test</td><td>0.003</td></tr></table><table class="bodyTable"><tr class="b"></tr><tr class="a"></tr></table><table class="bodyTable"><tr class="b"></tr><tr class="a"></tr></table>
+      </div>
+    </div>
+    <div class="clear">
+      <hr/>
+    </div>
+    <div id="footer">
+      <div class="xright">&#169;  
+          2007
+    
+          The Apache Software Foundation
+          
+  
+
+  
+    
+  
+  
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+  </body>
+</html>

Modified: incubator/tika/site/team-list.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/team-list.html?rev=596146&r1=596145&r2=596146&view=diff
==============================================================================
--- incubator/tika/site/team-list.html (original)
+++ incubator/tika/site/team-list.html Sun Nov 18 14:20:54 2007
@@ -135,6 +135,26 @@
         </li>
               </ul>
         </li>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+              
+        <li class="collapsed">
+              <a href="project-reports.html">Project Reports</a>
+              </li>
           </ul>
                                        <a href="http://maven.apache.org/" title="Built by Maven" id="poweredBy">
             <img alt="Built by Maven" src="./images/logos/maven-feather.png"></img>
@@ -150,50 +170,7 @@
     </div>
     <div id="bodyColumn">
       <div id="contentBox">
-        <div class="section"><h2>The Team</h2><p>A successful project requires many people to play many roles. Some members write code or documentation, while others are valuable as testers, submitting patches and suggestions.</p><p>The team is comprised of Members and Contributors. Members have direct access to the source of a project and actively evolve the code-base. Contributors improve the project through submission of patches and suggestions to the Members. The number of Contributors to the project is unbounded. Get involved today. All contributions to the project are greatly appreciated.</p><div class="section"><h3>Members</h3><p>The following is a list of developers with commit privileges that have directly contributed to the project in one way or another.</p><table class="bodyTable"><tr class="a"><th>Id</th><th>Name</th><th>Email</th><th>URL</th><th>Organization</th><th>Organization URL</th><th>Roles</th><th>Time Zone</th><th>Actual Time (GMT)</th><th>Properties</th
 ></tr>
-            
-<tr class="a"><td>-</td><td>Rida Benjelloun</td><td>-</td><td>-</td><td>-</td><td>-</td>
-    <td>committer</td><td>-</td><td><span id="developer-0">-</span></td><td>-</td>
-</tr>
-
-<tr class="b"><td>kbennett</td><td>Keith R. Bennett</td><td>-</td><td>-</td>
-    <td>-</td><td>-</td><td>committer</td><td>-5</td>
-    <td><span id="developer-7">-</span></td><td>-</td>
-</tr>
-
-<tr class="a"><td>cutting</td><td>Doug Cutting</td><td>-</td><td>-</td><td>-</td>
-    <td>-</td><td>mentor</td><td>-</td><td><span id="developer-1">-</span></td><td>-</td>
-</tr>
-
-<tr class="b"><td>bdelacretaz</td><td>Bertrand Delacretaz</td>
-    <td>-</td><td>-</td><td>-</td><td>-</td><td>mentor</td><td>-</td><td>
-    <span id="developer-2">-</span></td><td>-</td>
-</tr>
-
-<tr class="a"><td>mharwood</td><td>Mark Harwood</td><td>-</td><td>-</td>
-    <td>-</td><td>-</td><td>committer</td><td>-</td>
-    <td><span id="developer-3">-</span></td><td>-</td>
-</tr>
-                
-<tr class="b"><td>mattmann</td><td>Chris A. Mattmann</td><td>
-    <a href="mailto:mattmann@apache.org">mattmann@apache.org</a></td>
-    <td><a  href="http://people.apache.org/~mattmann/">
-    http://people.apache.org/~mattmann/</a></td>
-    <td>NASA Jet Propulsion Laboratory</td>
-    <td><a href="http://www.jpl.nasa.gov">
-    http://www.jpl.nasa.gov</a></td>
-   <td>committer</td><td>-8</td><td><span id="developer-4">-8</span></td><td>-</td>
-</tr>
-                
-<tr class="a"><td>siren</td><td>Sami Siren</td><td>-</td><td>-</td><td>-</td><td>-</td>
-    <td>committer</td><td>-</td><td><span id="developer-5">-</span></td><td>-</td>
-</tr>
-                
-<tr class="b"><td>jukka</td><td>Jukka Zitting</td><td>-</td><td>-</td><td>-</td><td>-</td>
-    <td>mentor, committer</td><td>-</td><td><span id="developer-6">-</span></td><td>-</td>
-</tr>
-                
-                </table></div><div class="section"><h3>Contributors</h3><p>There are no contributors listed for this project. Please check back again later.</p></div></div><script type="text/javascript">
+        <div class="section"><h2>The Team</h2><p>A successful project requires many people to play many roles. Some members write code or documentation, while others are valuable as testers, submitting patches and suggestions.</p><p>The team is comprised of Members and Contributors. Members have direct access to the source of a project and actively evolve the code-base. Contributors improve the project through submission of patches and suggestions to the Members. The number of Contributors to the project is unbounded. Get involved today. All contributions to the project are greatly appreciated.</p><div class="section"><h3>Members</h3><p>The following is a list of developers with commit privileges that have directly contributed to the project in one way or another.</p><table class="bodyTable"><tr class="a"><th>Id</th><th>Name</th><th>Email</th><th>URL</th><th>Organization</th><th>Organization URL</th><th>Roles</th><th>Time Zone</th><th>Actual Time (GMT)</th><th>Properties</th
 ></tr><tr class="b"><td>ridabenjelloun</td><td>Rida Benjelloun</td><td><a href="mailto:ridabenjelloun@apache.org">ridabenjelloun@apache.org</a></td><td>-</td><td>-</td><td>-</td><td>committer</td><td>-</td><td><span id="developer-0">-</span></td><td>-</td></tr><tr class="a"><td>kbennett</td><td>Keith Bennett</td><td>-</td><td>-</td><td>-</td><td>-</td><td>committer</td><td>-</td><td><span id="developer-1">-</span></td><td>-</td></tr><tr class="b"><td>cutting</td><td>Doug Cutting</td><td>-</td><td>-</td><td>-</td><td>-</td><td>mentor</td><td>-</td><td><span id="developer-2">-</span></td><td>-</td></tr><tr class="a"><td>bdelacretaz</td><td>Bertrand Delacretaz</td><td>-</td><td>-</td><td>-</td><td>-</td><td>mentor</td><td>-</td><td><span id="developer-3">-</span></td><td>-</td></tr><tr class="b"><td>mharwood</td><td>Mark Harwood</td><td>-</td><td>-</td><td>-</td><td>-</td><td>committer</td><td>-</td><td><span id="developer-4">-</span></td><td>-</td></tr><tr class="a"><td>mattma
 nn</td><td>Chris A. Mattmann</td><td><a href="mailto:mattmann@apache.org">mattmann@apache.org</a></td><td><a href="http://people.apache.org/~mattmann/">http://people.apache.org/~mattmann/</a></td><td>NASA Jet Propulsion Laboratory</td><td><a href="http://www.jpl.nasa.gov">http://www.jpl.nasa.gov</a></td><td>committer</td><td>-8</td><td><span id="developer-5">-8</span></td><td>-</td></tr><tr class="b"><td>siren</td><td>Sami Siren</td><td>-</td><td>-</td><td>-</td><td>-</td><td>committer</td><td>-</td><td><span id="developer-6">-</span></td><td>-</td></tr><tr class="a"><td>jukka</td><td>Jukka Zitting</td><td>-</td><td>-</td><td>-</td><td>-</td><td>mentor, committer</td><td>-</td><td><span id="developer-7">-</span></td><td>-</td></tr></table></div><div class="section"><h3>Contributors</h3><p>There are no contributors listed for this project. Please check back again later.</p></div></div><script type="text/javascript">
 function offsetDate(id, offset) {
     var now = new Date();
     var nowTime = now.getTime();
@@ -205,7 +182,7 @@
 }
 
 function init(){
-    offsetDate('developer-4', '-8');
+    offsetDate('developer-5', '-8');
 }
 
 window.onLoad = init();
@@ -219,7 +196,7 @@
       <div class="xright">&#169;  
           2007
     
-          Apache Software Foundation
+          The Apache Software Foundation
           
   
 

Added: incubator/tika/site/xref-test/allclasses-frame.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref-test/allclasses-frame.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref-test/allclasses-frame.html (added)
+++ incubator/tika/site/xref-test/allclasses-frame.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,62 @@
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "DTD/xhtml1-transitional.dtd">
+<html xml:lang="en" lang="en">
+	<head>
+		<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+		<title>All Classes</title>
+		<link rel="stylesheet" type="text/css" href="stylesheet.css" title="style" />
+    </head>
+    <body>
+
+		<h3>All Classes</h3>
+
+		<ul>
+						<li>
+				<a href="org/apache/tika/parser/AutoDetectParserTest.html" target="classFrame">AutoDetectParserTest</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/parser/microsoft/ExcelParserTest.html" target="classFrame">ExcelParserTest</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/parser/html/HtmlParserTest.html" target="classFrame">HtmlParserTest</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/mime/MimeTypeTest.html" target="classFrame">MimeTypeTest</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/mime/MimeTypesTest.html" target="classFrame">MimeTypesTest</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/parser/microsoft/PowerPointParserTest.html" target="classFrame">PowerPointParserTest</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/parser/txt/TXTParserTest.html" target="classFrame">TXTParserTest</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/TestRereadableInputStream.html" target="classFrame">TestInputStream</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/metadata/TestMetadata.html" target="classFrame">TestMetadata</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/mime/TestMimeTypes.html" target="classFrame">TestMimeTypes</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/parser/AutoDetectParserTest.html" target="classFrame">TestParams</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/TestParsers.html" target="classFrame">TestParsers</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/TestRereadableInputStream.html" target="classFrame">TestRereadableInputStream</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/metadata/TestSpellCheckedMetadata.html" target="classFrame">TestSpellCheckedMetadata</a>
+			</li>
+						<li>
+				<a href="org/apache/tika/parser/microsoft/WordParserTest.html" target="classFrame">WordParserTest</a>
+			</li>
+			      </ul>
+
+    </body>
+  </html>

Added: incubator/tika/site/xref-test/index.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref-test/index.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref-test/index.html (added)
+++ incubator/tika/site/xref-test/index.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,24 @@
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN" "DTD/xhtml1-frameset.dtd">
+<html xml:lang="en" lang="en">
+	<head>
+		<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+		<title>Apache Tika 0.1-SNAPSHOT Reference</title>
+	</head>
+    <frameset cols="20%,80%">
+		<frameset rows="30%,70%">
+			<frame src="overview-frame.html" name="packageListFrame" />
+			<frame src="allclasses-frame.html" name="packageFrame" />
+		</frameset>
+		<frame src="overview-summary.html" name="classFrame" />
+        <noframes>
+            <body>
+                <h1>Frame Alert</h1>
+                <p>
+                    You don't have frames. Go <a href="overview-summary.html">here</a>
+                </p>
+            </body>
+        </noframes>
+    </frameset>
+</html>
+

Added: incubator/tika/site/xref-test/org/apache/tika/TestParsers.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref-test/org/apache/tika/TestParsers.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref-test/org/apache/tika/TestParsers.html (added)
+++ incubator/tika/site/xref-test/org/apache/tika/TestParsers.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,216 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>TestParsers xref</title>
+<link type="text/css" rel="stylesheet" href="../../../stylesheet.css" />
+</head>
+<body>
+<pre>
+
+<a name="1" href="#1">1</a>   <em class="jxr_javadoccomment">/**</em>
+<a name="2" href="#2">2</a>   <em class="jxr_javadoccomment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a>   <em class="jxr_javadoccomment"> * contributor license agreements.  See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a>   <em class="jxr_javadoccomment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a>   <em class="jxr_javadoccomment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a>   <em class="jxr_javadoccomment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a>   <em class="jxr_javadoccomment"> * the License.  You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a>   <em class="jxr_javadoccomment"> *</em>
+<a name="9" href="#9">9</a>   <em class="jxr_javadoccomment"> *     <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a>  <em class="jxr_javadoccomment"> *</em>
+<a name="11" href="#11">11</a>  <em class="jxr_javadoccomment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a>  <em class="jxr_javadoccomment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a>  <em class="jxr_javadoccomment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a>  <em class="jxr_javadoccomment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a>  <em class="jxr_javadoccomment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="17" href="#17">17</a>  <strong class="jxr_keyword">package</strong> org.apache.tika;
+<a name="18" href="#18">18</a>  
+<a name="19" href="#19">19</a>  <strong class="jxr_keyword">import</strong> java.io.File;
+<a name="20" href="#20">20</a>  <strong class="jxr_keyword">import</strong> java.io.FileInputStream;
+<a name="21" href="#21">21</a>  <strong class="jxr_keyword">import</strong> java.io.IOException;
+<a name="22" href="#22">22</a>  <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="23" href="#23">23</a>  <strong class="jxr_keyword">import</strong> java.util.List;
+<a name="24" href="#24">24</a>  
+<a name="25" href="#25">25</a>  <strong class="jxr_keyword">import</strong> junit.framework.TestCase;
+<a name="26" href="#26">26</a>  
+<a name="27" href="#27">27</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.config.TikaConfig;
+<a name="28" href="#28">28</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="29" href="#29">29</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.parser.Parser;
+<a name="30" href="#30">30</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.utils.ParseUtils;
+<a name="31" href="#31">31</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.utils.Utils;
+<a name="32" href="#32">32</a>  <strong class="jxr_keyword">import</strong> org.jdom.JDOMException;
+<a name="33" href="#33">33</a>  <strong class="jxr_keyword">import</strong> org.xml.sax.helpers.DefaultHandler;
+<a name="34" href="#34">34</a>  
+<a name="35" href="#35">35</a>  <em class="jxr_javadoccomment">/**</em>
+<a name="36" href="#36">36</a>  <em class="jxr_javadoccomment"> * Junit test class for Tika {@link Parser}s.</em>
+<a name="37" href="#37">37</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="38" href="#38">38</a>  <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../org/apache/tika/TestParsers.html">TestParsers</a> <strong class="jxr_keyword">extends</strong> TestCase {
+<a name="39" href="#39">39</a>  
+<a name="40" href="#40">40</a>      <strong class="jxr_keyword">private</strong> TikaConfig tc;
+<a name="41" href="#41">41</a>  
+<a name="42" href="#42">42</a>      <strong class="jxr_keyword">private</strong> File testFilesBaseDir;
+<a name="43" href="#43">43</a>  
+<a name="44" href="#44">44</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> setUp() <strong class="jxr_keyword">throws</strong> JDOMException, IOException {
+<a name="45" href="#45">45</a>          <em class="jxr_comment">/*</em>
+<a name="46" href="#46">46</a>  <em class="jxr_comment">         * FIXME the old mechanism does not work anymore when running the tests</em>
+<a name="47" href="#47">47</a>  <em class="jxr_comment">         * with Maven - need a resource-based one, but this means more changes</em>
+<a name="48" href="#48">48</a>  <em class="jxr_comment">         * to classes which rely on filenames.</em>
+<a name="49" href="#49">49</a>  <em class="jxr_comment">         * </em>
+<a name="50" href="#50">50</a>  <em class="jxr_comment">         * String sep = File.separator; StringTokenizer st = new</em>
+<a name="51" href="#51">51</a>  <em class="jxr_comment">         * StringTokenizer(System.getProperty( "java.class.path"),</em>
+<a name="52" href="#52">52</a>  <em class="jxr_comment">         * File.pathSeparator);</em>
+<a name="53" href="#53">53</a>  <em class="jxr_comment">         * </em>
+<a name="54" href="#54">54</a>  <em class="jxr_comment">         * classDir = new File(st.nextToken());</em>
+<a name="55" href="#55">55</a>  <em class="jxr_comment">         * </em>
+<a name="56" href="#56">56</a>  <em class="jxr_comment">         * config = classDir.getParent() + sep + "config" + sep + "config.xml";</em>
+<a name="57" href="#57">57</a>  <em class="jxr_comment">         * </em>
+<a name="58" href="#58">58</a>  <em class="jxr_comment">         * String log4j = classDir.getParent() + sep + "Config" + sep + "log4j" +</em>
+<a name="59" href="#59">59</a>  <em class="jxr_comment">         * sep + "log4j.properties";</em>
+<a name="60" href="#60">60</a>  <em class="jxr_comment">         */</em>
+<a name="61" href="#61">61</a>  
+<a name="62" href="#62">62</a>          testFilesBaseDir = <strong class="jxr_keyword">new</strong> File(<span class="jxr_string">"src/test/resources/test-documents"</span>);
+<a name="63" href="#63">63</a>  
+<a name="64" href="#64">64</a>          tc = TikaConfig.getDefaultConfig();
+<a name="65" href="#65">65</a>      }
+<a name="66" href="#66">66</a>  
+<a name="67" href="#67">67</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> testPDFExtraction() <strong class="jxr_keyword">throws</strong> Exception {
+<a name="68" href="#68">68</a>          File file = getTestFile(<span class="jxr_string">"testPDF.pdf"</span>);
+<a name="69" href="#69">69</a>          String s1 = ParseUtils.getStringContent(file, tc);
+<a name="70" href="#70">70</a>          String s2 = ParseUtils.getStringContent(file, tc, <span class="jxr_string">"application/pdf"</span>);
+<a name="71" href="#71">71</a>          String s3 = ParseUtils.getStringContent(file, TikaConfig
+<a name="72" href="#72">72</a>                  .getDefaultConfig());
+<a name="73" href="#73">73</a>          assertEquals(s1, s2);
+<a name="74" href="#74">74</a>          assertEquals(s1, s3);
+<a name="75" href="#75">75</a>      }
+<a name="76" href="#76">76</a>  
+<a name="77" href="#77">77</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> testTXTExtraction() <strong class="jxr_keyword">throws</strong> Exception {
+<a name="78" href="#78">78</a>          File file = getTestFile(<span class="jxr_string">"testTXT.txt"</span>);
+<a name="79" href="#79">79</a>          String s1 = ParseUtils.getStringContent(file, tc);
+<a name="80" href="#80">80</a>          String s2 = ParseUtils.getStringContent(file, tc, <span class="jxr_string">"text/plain"</span>);
+<a name="81" href="#81">81</a>          assertEquals(s1, s2);
+<a name="82" href="#82">82</a>      }
+<a name="83" href="#83">83</a>  
+<a name="84" href="#84">84</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> testRTFExtraction() <strong class="jxr_keyword">throws</strong> Exception {
+<a name="85" href="#85">85</a>          File file = getTestFile(<span class="jxr_string">"testRTF.rtf"</span>);
+<a name="86" href="#86">86</a>          String s1 = ParseUtils.getStringContent(file, tc);
+<a name="87" href="#87">87</a>          String s2 = ParseUtils.getStringContent(file, tc, <span class="jxr_string">"application/rtf"</span>);
+<a name="88" href="#88">88</a>          assertEquals(s1, s2);
+<a name="89" href="#89">89</a>      }
+<a name="90" href="#90">90</a>  
+<a name="91" href="#91">91</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> testXMLExtraction() <strong class="jxr_keyword">throws</strong> Exception {
+<a name="92" href="#92">92</a>          File file = getTestFile(<span class="jxr_string">"testXML.xml"</span>);
+<a name="93" href="#93">93</a>          String s1 = ParseUtils.getStringContent(file, tc);
+<a name="94" href="#94">94</a>          String s2 = ParseUtils.getStringContent(file, tc, <span class="jxr_string">"application/xml"</span>);
+<a name="95" href="#95">95</a>          assertEquals(s1, s2);
+<a name="96" href="#96">96</a>      }
+<a name="97" href="#97">97</a>  
+<a name="98" href="#98">98</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> testPPTExtraction() <strong class="jxr_keyword">throws</strong> Exception {
+<a name="99" href="#99">99</a>          File file = getTestFile(<span class="jxr_string">"testPPT.ppt"</span>);
+<a name="100" href="#100">100</a>         String s1 = ParseUtils.getStringContent(file, tc);
+<a name="101" href="#101">101</a>         String s2 = ParseUtils.getStringContent(
+<a name="102" href="#102">102</a>                 file, tc, <span class="jxr_string">"application/vnd.ms-powerpoint"</span>);
+<a name="103" href="#103">103</a>         assertEquals(s1, s2);
+<a name="104" href="#104">104</a>         Parser parser = tc.getParser(<span class="jxr_string">"application/vnd.ms-powerpoint"</span>);
+<a name="105" href="#105">105</a>         Metadata metadata = <strong class="jxr_keyword">new</strong> Metadata();
+<a name="106" href="#106">106</a>         InputStream stream = <strong class="jxr_keyword">new</strong> FileInputStream(file);
+<a name="107" href="#107">107</a>         <strong class="jxr_keyword">try</strong> {
+<a name="108" href="#108">108</a>             parser.parse(stream, <strong class="jxr_keyword">new</strong> DefaultHandler(), metadata);
+<a name="109" href="#109">109</a>         } <strong class="jxr_keyword">finally</strong> {
+<a name="110" href="#110">110</a>             stream.close();
+<a name="111" href="#111">111</a>         }
+<a name="112" href="#112">112</a>         assertEquals(<span class="jxr_string">"Sample Powerpoint Slide"</span>, metadata.get(Metadata.TITLE));
+<a name="113" href="#113">113</a>     }
+<a name="114" href="#114">114</a> 
+<a name="115" href="#115">115</a>     <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> testWORDxtraction() <strong class="jxr_keyword">throws</strong> Exception {
+<a name="116" href="#116">116</a>         File file = getTestFile(<span class="jxr_string">"testWORD.doc"</span>);
+<a name="117" href="#117">117</a>         String s1 = ParseUtils.getStringContent(file, tc);
+<a name="118" href="#118">118</a>         String s2 = ParseUtils.getStringContent(file, tc, <span class="jxr_string">"application/msword"</span>);
+<a name="119" href="#119">119</a>         assertEquals(s1, s2);
+<a name="120" href="#120">120</a>         Parser parser = tc.getParser(<span class="jxr_string">"application/msword"</span>);
+<a name="121" href="#121">121</a>         Metadata metadata = <strong class="jxr_keyword">new</strong> Metadata();
+<a name="122" href="#122">122</a>         InputStream stream = <strong class="jxr_keyword">new</strong> FileInputStream(file);
+<a name="123" href="#123">123</a>         <strong class="jxr_keyword">try</strong> {
+<a name="124" href="#124">124</a>             parser.parse(stream, <strong class="jxr_keyword">new</strong> DefaultHandler(), metadata);
+<a name="125" href="#125">125</a>         } <strong class="jxr_keyword">finally</strong> {
+<a name="126" href="#126">126</a>             stream.close();
+<a name="127" href="#127">127</a>         }
+<a name="128" href="#128">128</a>         assertEquals(<span class="jxr_string">"Sample Word Document"</span>, metadata.get(Metadata.TITLE));
+<a name="129" href="#129">129</a>     }
+<a name="130" href="#130">130</a> 
+<a name="131" href="#131">131</a>     <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> testEXCELExtraction() <strong class="jxr_keyword">throws</strong> Exception {
+<a name="132" href="#132">132</a>         <strong class="jxr_keyword">final</strong> String expected = <span class="jxr_string">"Numbers and their Squares Number Square 1.0 "</span>
+<a name="133" href="#133">133</a>             + <span class="jxr_string">"1.0 2.0 4.0 3.0 9.0 4.0 16.0 5.0 25.0 6.0 36.0 7.0 49.0 8.0 "</span>
+<a name="134" href="#134">134</a>             + <span class="jxr_string">"64.0 9.0 81.0 10.0 100.0 11.0 121.0 12.0 144.0 13.0 169.0 "</span>
+<a name="135" href="#135">135</a>             + <span class="jxr_string">"14.0 196.0 15.0 225.0 Written and saved in Microsoft Excel "</span>
+<a name="136" href="#136">136</a>             + <span class="jxr_string">"X for Mac Service Release 1."</span>;
+<a name="137" href="#137">137</a>         File file = getTestFile(<span class="jxr_string">"testEXCEL.xls"</span>);
+<a name="138" href="#138">138</a>         String s1 = ParseUtils.getStringContent(file, tc);
+<a name="139" href="#139">139</a>         String s2 = ParseUtils.getStringContent(file, tc,
+<a name="140" href="#140">140</a>         <span class="jxr_string">"application/vnd.ms-excel"</span>);
+<a name="141" href="#141">141</a>         assertEquals(s1, s2);
+<a name="142" href="#142">142</a>         assertTrue(<span class="jxr_string">"Text does not contain '"</span> + expected + <span class="jxr_string">"'"</span>, s1
+<a name="143" href="#143">143</a>                 .contains(expected));
+<a name="144" href="#144">144</a>         Parser parser = tc.getParser(<span class="jxr_string">"application/vnd.ms-excel"</span>);
+<a name="145" href="#145">145</a>         Metadata metadata = <strong class="jxr_keyword">new</strong> Metadata();
+<a name="146" href="#146">146</a>         InputStream stream = <strong class="jxr_keyword">new</strong> FileInputStream(file);
+<a name="147" href="#147">147</a>         <strong class="jxr_keyword">try</strong> {
+<a name="148" href="#148">148</a>             parser.parse(stream, <strong class="jxr_keyword">new</strong> DefaultHandler(), metadata);
+<a name="149" href="#149">149</a>         } <strong class="jxr_keyword">finally</strong> {
+<a name="150" href="#150">150</a>             stream.close();
+<a name="151" href="#151">151</a>         }
+<a name="152" href="#152">152</a>         assertEquals(<span class="jxr_string">"Simple Excel document"</span>, metadata.get(Metadata.TITLE));
+<a name="153" href="#153">153</a>     }
+<a name="154" href="#154">154</a> 
+<a name="155" href="#155">155</a>     <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> testOOExtraction() <strong class="jxr_keyword">throws</strong> Exception {
+<a name="156" href="#156">156</a>         File file = getTestFile(<span class="jxr_string">"testOpenOffice2.odt"</span>);
+<a name="157" href="#157">157</a>         String s1 = ParseUtils.getStringContent(file, tc);
+<a name="158" href="#158">158</a>         String s2 = ParseUtils.getStringContent(file, tc,
+<a name="159" href="#159">159</a>         <span class="jxr_string">"application/vnd.oasis.opendocument.text"</span>);
+<a name="160" href="#160">160</a>         assertEquals(s1, s2);
+<a name="161" href="#161">161</a>     }
+<a name="162" href="#162">162</a> 
+<a name="163" href="#163">163</a>     <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> testHTMLExtraction() <strong class="jxr_keyword">throws</strong> Exception {
+<a name="164" href="#164">164</a>         File file = getTestFile(<span class="jxr_string">"testHTML.html"</span>);
+<a name="165" href="#165">165</a>         String s1 = ParseUtils.getStringContent(file, tc);
+<a name="166" href="#166">166</a>         String s2 = ParseUtils.getStringContent(file, tc, <span class="jxr_string">"text/html"</span>);
+<a name="167" href="#167">167</a>         assertEquals(s1, s2);
+<a name="168" href="#168">168</a> 
+<a name="169" href="#169">169</a>         Parser parser = tc.getParser(<span class="jxr_string">"text/html"</span>);
+<a name="170" href="#170">170</a>         assertNotNull(parser);
+<a name="171" href="#171">171</a>     }
+<a name="172" href="#172">172</a> 
+<a name="173" href="#173">173</a>     <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> testZipExtraction() <strong class="jxr_keyword">throws</strong> Exception {
+<a name="174" href="#174">174</a>         File zip = getTestFile(<span class="jxr_string">"test-documents.zip"</span>);
+<a name="175" href="#175">175</a>         List&lt;Parser&gt; parsers = ParseUtils.getParsersFromZip(zip, tc);
+<a name="176" href="#176">176</a>         List&lt;File&gt; zipFiles = Utils.unzip(<strong class="jxr_keyword">new</strong> FileInputStream(zip));
+<a name="177" href="#177">177</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i &lt; parsers.size(); i++) {
+<a name="178" href="#178">178</a>             Parser zipEntryParser = parsers.get(i);
+<a name="179" href="#179">179</a>             assertNotNull(zipEntryParser);
+<a name="180" href="#180">180</a>             <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> j = 0; j &lt; zipFiles.size(); j++) {
+<a name="181" href="#181">181</a>                 <em class="jxr_comment">/*<em class="jxr_comment"> FIXME: Doesn't work with the new Parser interface</em></em>
+<a name="182" href="#182">182</a> <em class="jxr_comment">                ParserConfig config = tc.getParserConfig(</em>
+<a name="183" href="#183">183</a> <em class="jxr_comment">                        zipEntryParser.getMimeType());</em>
+<a name="184" href="#184">184</a> <em class="jxr_comment">                Map&lt;String, Content&gt; contents = config.getContents();</em>
+<a name="185" href="#185">185</a> <em class="jxr_comment">                assertNotNull(contents);</em>
+<a name="186" href="#186">186</a> <em class="jxr_comment">                InputStream stream = new FileInputStream(zipFiles.get(j));</em>
+<a name="187" href="#187">187</a> <em class="jxr_comment">                try {</em>
+<a name="188" href="#188">188</a> <em class="jxr_comment">                    zipEntryParser.getContents(stream, contents);</em>
+<a name="189" href="#189">189</a> <em class="jxr_comment">                    assertNotNull(contents.get("fullText"));</em>
+<a name="190" href="#190">190</a> <em class="jxr_comment">                } finally {</em>
+<a name="191" href="#191">191</a> <em class="jxr_comment">                    stream.close();</em>
+<a name="192" href="#192">192</a> <em class="jxr_comment">                }</em>
+<a name="193" href="#193">193</a> <em class="jxr_comment">                */</em>
+<a name="194" href="#194">194</a>             }
+<a name="195" href="#195">195</a>         }
+<a name="196" href="#196">196</a>     }
+<a name="197" href="#197">197</a> 
+<a name="198" href="#198">198</a>     <strong class="jxr_keyword">private</strong> File getTestFile(String filename) {
+<a name="199" href="#199">199</a>         <strong class="jxr_keyword">return</strong> <strong class="jxr_keyword">new</strong> File(testFilesBaseDir, filename);
+<a name="200" href="#200">200</a>     }
+<a name="201" href="#201">201</a> 
+<a name="202" href="#202">202</a> }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+