You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/10/31 22:49:59 UTC

svn commit: r1029500 [2/9] - in /tika/site: ./ publish/ publish/0.5/ publish/0.6/ publish/0.7/ publish/css/ src/site/ src/site/resources/ src/site/resources/css/

Modified: tika/site/publish/0.5/index.html
URL: http://svn.apache.org/viewvc/tika/site/publish/0.5/index.html?rev=1029500&r1=1029499&r2=1029500&view=diff
==============================================================================
--- tika/site/publish/0.5/index.html (original)
+++ tika/site/publish/0.5/index.html Sun Oct 31 21:49:59 2010
@@ -36,92 +36,62 @@
     </style>
     <link rel="icon" type="image/png" href="../tikaNoText16.png" />
     <script type="text/javascript">
-      function getBlank(form, stdValue) {
-        if (form.value == stdValue) {
-          form.value = '';
+      function selectProvider(form) {
+        provider = form.elements['searchProvider'].value;
+        if (provider == "any") {
+          if (Math.random() > 0.5) {
+            provider = "lucid";
+          } else {
+            provider = "sl";
+          }
+        }
+        if (provider == "lucid") {
+          form.action = "http://search.lucidimagination.com/p:tika";
+        } else if (provider == "sl") {
+          form.action = "http://search-lucene.com/tika";
         }
-        return true;
+        days = 90;
+        date = new Date();
+        date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000));
+        expires = "; expires=" + date.toGMTString();
+        document.cookie = "searchProvider=" + provider + expires + "; path=/";
       }
-      function getPrompt(form, stdValue) {
-        if (form.value == '') {
-          form.value = stdValue;
+      function initProvider() {
+        if (document.cookie.length>0) {
+          cStart=document.cookie.indexOf("searchProvider=");
+          if (cStart!=-1) {
+            cStart=cStart + "searchProvider=".length;
+            cEnd=document.cookie.indexOf(";", cStart);
+            if (cEnd==-1) {
+              cEnd=document.cookie.length;
+            }
+            provider = unescape(document.cookie.substring(cStart,cEnd));
+            document.forms['searchform'].elements['searchProvider'].value = provider;
+          }
         }
-        return true;
       }
     </script>
   </head>
-  <body class="composite">
-    <div id="banner">
-                  <a href="" id="bannerLeft"  title="Apache Tika"  >
+  <body onLoad="initProvider();">
+    <div id="body">
+      <div id="banner">
+                    <a href="" id="bannerLeft"  title="Apache Tika"  >
     
                                             <img src="../tika.png" alt="Apache Tika" />
     
             </a>
-                        <a href="www.apache.org" id="bannerRight"  title="Apache"  >
+                          <a href="http://www.apache.org/" id="bannerRight"  title="The Apache Software Foundation"  >
     
-                                    <img src="http://www.apache.org/images/feather-small.gif" alt="Apache" />
+                                            <img src="../asf-logo.gif" alt="The Apache Software Foundation" />
     
             </a>
-            <div class="clear">
-        <hr/>
-      </div>
-    </div>
-    <div id="search">
-      <script type="text/javascript">
-        function selectProvider(form) {
-          provider = form.elements['searchProvider'].value;
-          if (provider == "any") {
-            if (Math.random() > 0.5) {
-              provider = "lucid";
-            } else {
-              provider = "sl";
-            }
-          }
-
-          if (provider == "lucid") {
-            form.action = "http://search.lucidimagination.com/p:tika";
-          } else if (provider == "sl") {
-            form.action = "http://search-lucene.com/tika";
-          }
-
-          days = 90;
-          date = new Date();
-          date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000));
-          expires = "; expires=" + date.toGMTString();
-          document.cookie = "searchProvider=" + provider + expires + "; path=/";
-        }
-      </script>
-      <form action="http://search.lucidimagination.com/p:tika" method="get" id="searchform">
-        <input type="text" id="query" name="q" size="30" onFocus="getBlank (this, 'Search with Apache Solr');" value="Search with Apache Solr"></input>
-        <input type="submit" value="Search" name="Search" onclick="selectProvider(this.form)"/>
-        @
-        <select name="searchProvider" id="searchProvider">
-          <option value="any">select provider</option>
-          <option value="lucid">Lucid Find</option>
-          <option value="sl">Search-Lucene</option>
-        </select>
-        <script type="text/javascript">
-          if (document.cookie.length>0) {
-            cStart=document.cookie.indexOf("searchProvider=");
-            if (cStart!=-1) {
-              cStart=cStart + "searchProvider=".length;
-              cEnd=document.cookie.indexOf(";", cStart);
-              if (cEnd==-1) {
-                cEnd=document.cookie.length;
-              }
-              provider = unescape(document.cookie.substring(cStart,cEnd));
-              document.forms['searchform'].elements['searchProvider'].value = provider;
-            }
-          }
-        </script>
-      </form>
-      <div class="clear">
-        <hr/>
+            </div>
+      <div id="content">
+        <!-- Licensed to the Apache Software Foundation (ASF) under one or more --><!-- contributor license agreements.  See the NOTICE file distributed with --><!-- this work for additional information regarding copyright ownership. --><!-- The ASF licenses this file to You under the Apache License, Version 2.0 --><!-- (the "License"); you may not use this file except in compliance with --><!-- the License.  You may obtain a copy of the License at --><!--  --><!-- http://www.apache.org/licenses/LICENSE-2.0 --><!--  --><!-- Unless required by applicable law or agreed to in writing, software --><!-- distributed under the License is distributed on an "AS IS" BASIS, --><!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --><!-- See the License for the specific language governing permissions and --><!-- limitations under the License. --><div class="section"><h2>Apache Tika 0.5<a name="Apache_Tika_0.5"></a></h2><p>The most notable changes in Tika 0.5 ove
 r the previous release are:</p><ul><li>Improved RDF/OWL mime detection using both MIME magic as well as pattern matching. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-309">TIKA-309</a>)</li><li>An org.apache.tika.Tika facade class has been added to simplify common text extraction and type detection use cases. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-269">TIKA-269</a>)</li><li>A new parse context argument was added to the Parser.parse() method. This context map can be used to pass things like a delegate parser or other settings to the parsing process. The previous parse() method signature has been deprecated and will be removed in Tika 1.0. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>)</li><li>A simple ngram-based language detection mechanism has been added along with predefined language profiles for 18 languages. (<a class="externalLink" href="https://issues.apache.or
 g/jira/browse/TIKA-209">TIKA-209</a>)</li><li>The media type registry in Tika was synchronized with the MIME type configuration in the Apache HTTP Server. Tika now knows about 1274 different media types and can detect 672 of those using 927 file extension and 280 magic byte patterns. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-285">TIKA-285</a>)</li><li>Tika now uses the Apache PDFBox version 0.8.0-incubating for parsing PDF documents. This version is notably better than the 0.7.3 release used earlier. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-158">TIKA-158</a>)</li></ul><p>The following people have contributed to Tika 0.5 by submitting or commenting on the issues resolved in this release:</p><ul><li>Alex Baranov</li><li>Bart Hanssens</li><li>Benson Margulies</li><li>Chris A. Mattmann</li><li>Daan de Wit</li><li>Erik Hetzner</li><li>Frank Hellwig</li><li>Jeff Cadow</li><li>Joachim Zittmayr</li><li>Jukka Zitting </
 li><li>Julien Nioche</li><li>Ken Krugler</li><li>Maxim Valyanskiy</li><li>MRIT64</li><li>Paul Borgermans</li><li>Piotr B.</li><li>Robert Newson</li><li>Sascha Szott</li><li>Ted Dunning</li><li>Thilo Goetz</li><li>Uwe Schindler</li><li>Yuan-Fang Li</li></ul><p>See <a class="externalLink" href="http://tinyurl.com/yl9prwp">http://tinyurl.com/yl9prwp</a> for more details on these contributions.</p></div>
       </div>
-    </div>
-    <div id="leftColumn">
-      <div id="navcolumn">
-                 <h5>Apache Tika</h5>
+      <div id="sidebar">
+        <div id="navigation">
+                    <h5>Apache Tika</h5>
             <ul>
               
     <li class="none">
@@ -147,7 +117,7 @@
               <h5>Documentation</h5>
             <ul>
               
-          
+                
                     
                   
                   
@@ -155,35 +125,9 @@
                   
                   
               
-        <li class="expanded">
-                    <a href="../0.7/index.html">Tika 0.7</a>
-                  <ul>
-                  
-    <li class="none">
-                    <a href="../0.7/gettingstarted.html">Getting Started</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/formats.html">Supported Formats</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/parser.html">Parser API</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/parser_guide.html">Parser 5min Quick Start Guide</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/detection.html">Content and Language Detection</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/api/">API Documentation</a>
-          </li>
-              </ul>
-        </li>
+        <li class="collapsed">
+                    <a href="../0.7/index.html">Apache Tika 0.7</a>
+                </li>
               
                 
                     
@@ -192,7 +136,7 @@
                   
               
         <li class="collapsed">
-                    <a href="../0.6/index.html">Tika 0.6</a>
+                    <a href="../0.6/index.html">Apache Tika 0.6</a>
                 </li>
               
                 
@@ -202,7 +146,7 @@
                   
               
             <li class="expanded">
-              <strong>Tika 0.5</strong>
+              <strong>Apache Tika 0.5</strong>
                 <ul>
                   
     <li class="none">
@@ -223,33 +167,44 @@
               </ul>
         </li>
           </ul>
-            </div>
-      <div id="bookpromo">  
-        <a href="http://manning.com/mattmann/" title="Tika in Action"
-           ><img src="../mattmann_cover150.jpg"
-                 border="0" width="150" height="186"/></a>
-      </div>
-    </div>
-    <div id="bodyColumn">
-      <div id="contentBox">
-        <!-- Licensed to the Apache Software Foundation (ASF) under one or more --><!-- contributor license agreements.  See the NOTICE file distributed with --><!-- this work for additional information regarding copyright ownership. --><!-- The ASF licenses this file to You under the Apache License, Version 2.0 --><!-- (the "License"); you may not use this file except in compliance with --><!-- the License.  You may obtain a copy of the License at --><!--  --><!-- http://www.apache.org/licenses/LICENSE-2.0 --><!--  --><!-- Unless required by applicable law or agreed to in writing, software --><!-- distributed under the License is distributed on an "AS IS" BASIS, --><!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --><!-- See the License for the specific language governing permissions and --><!-- limitations under the License. --><div class="section"><h2>Apache Tika 0.5<a name="Apache_Tika_0.5"></a></h2><p>The most notable changes in Tika 0.5 ove
 r the previous release are:</p><ul><li>Improved RDF/OWL mime detection using both MIME magic as well as pattern matching. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-309">TIKA-309</a>)</li><li>An org.apache.tika.Tika facade class has been added to simplify common text extraction and type detection use cases. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-269">TIKA-269</a>)</li><li>A new parse context argument was added to the Parser.parse() method. This context map can be used to pass things like a delegate parser or other settings to the parsing process. The previous parse() method signature has been deprecated and will be removed in Tika 1.0. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-275">TIKA-275</a>)</li><li>A simple ngram-based language detection mechanism has been added along with predefined language profiles for 18 languages. (<a class="externalLink" href="https://issues.apache.or
 g/jira/browse/TIKA-209">TIKA-209</a>)</li><li>The media type registry in Tika was synchronized with the MIME type configuration in the Apache HTTP Server. Tika now knows about 1274 different media types and can detect 672 of those using 927 file extension and 280 magic byte patterns. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-285">TIKA-285</a>)</li><li>Tika now uses the Apache PDFBox version 0.8.0-incubating for parsing PDF documents. This version is notably better than the 0.7.3 release used earlier. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-158">TIKA-158</a>)</li></ul><p>The following people have contributed to Tika 0.5 by submitting or commenting on the issues resolved in this release:</p><ul><li>Alex Baranov</li><li>Bart Hanssens</li><li>Benson Margulies</li><li>Chris A. Mattmann</li><li>Daan de Wit</li><li>Erik Hetzner</li><li>Frank Hellwig</li><li>Jeff Cadow</li><li>Joachim Zittmayr</li><li>Jukka Zitting </
 li><li>Julien Nioche</li><li>Ken Krugler</li><li>Maxim Valyanskiy</li><li>MRIT64</li><li>Paul Borgermans</li><li>Piotr B.</li><li>Robert Newson</li><li>Sascha Szott</li><li>Ted Dunning</li><li>Thilo Goetz</li><li>Uwe Schindler</li><li>Yuan-Fang Li</li></ul><p>See <a class="externalLink" href="http://tinyurl.com/yl9prwp">http://tinyurl.com/yl9prwp</a> for more details on these contributions.</p></div>
+      
+          <div id="search">
+            <h5>Search with Apache Solr</h5>
+            <form action="http://search.lucidimagination.com/p:tika"
+                  method="get" id="searchform">
+              <input type="text" id="query" name="q"/>
+              <select name="searchProvider" id="searchProvider">
+                <option value="any">provider</option>
+                <option value="lucid">Lucid Find</option>
+                <option value="sl">Search-Lucene</option>
+              </select>
+              <input type="submit" id="submit" value="Search" name="Search"
+                     onclick="selectProvider(this.form)"/>
+            </form>
+          </div>
+
+          <div id="bookpromo">
+            <h5>Books about Tika</h5>
+            <p>
+              <a href="http://manning.com/mattmann/" title="Tika in Action"
+                ><img src="../mattmann_cover150.jpg"
+                      width="150" height="186"/></a>
+            </p>
+          </div>
+        </div>
       </div>
-    </div>
-    <div class="clear">
-      <hr/>
-    </div>
-    <div id="footer">
-      <p>
-        Copyright 2010
-        <a href="http://www.apache.org/">The Apache Software Foundation</a>.
-        Site powered by <a href="http://maven.apache.org/">Apache Maven</a>. 
-        Search powered by <a href="http://www.lucidimagination.com">Lucid Imagination</a> & <a href="http://sematext.com">Sematext</a>.
-        <br/>
-        Apache Tika, Tika, Apache, the Apache feather logo, and the Apache
-        Tika project logo are trademarks of The Apache Software Foundation.
-      </p>
-      <div class="clear">
-        <hr/>
+      <div id="footer">
+        <p>
+          Copyright &#169; 2010
+          <a href="http://www.apache.org/">The Apache Software Foundation</a>.
+          Site powered by <a href="http://maven.apache.org/">Apache Maven</a>. 
+          Search powered by
+          <a href="http://www.lucidimagination.com">Lucid Imagination</a>
+          and <a href="http://sematext.com">Sematext</a>.
+          <br/>
+          Apache Tika, Tika, Apache, the Apache feather logo, and the Apache
+          Tika project logo are trademarks of The Apache Software Foundation.
+        </p>
       </div>
     </div>
   </body>

Modified: tika/site/publish/0.6/formats.html
URL: http://svn.apache.org/viewvc/tika/site/publish/0.6/formats.html?rev=1029500&r1=1029499&r2=1029500&view=diff
==============================================================================
--- tika/site/publish/0.6/formats.html (original)
+++ tika/site/publish/0.6/formats.html Sun Oct 31 21:49:59 2010
@@ -36,92 +36,62 @@
     </style>
     <link rel="icon" type="image/png" href="../tikaNoText16.png" />
     <script type="text/javascript">
-      function getBlank(form, stdValue) {
-        if (form.value == stdValue) {
-          form.value = '';
+      function selectProvider(form) {
+        provider = form.elements['searchProvider'].value;
+        if (provider == "any") {
+          if (Math.random() > 0.5) {
+            provider = "lucid";
+          } else {
+            provider = "sl";
+          }
+        }
+        if (provider == "lucid") {
+          form.action = "http://search.lucidimagination.com/p:tika";
+        } else if (provider == "sl") {
+          form.action = "http://search-lucene.com/tika";
         }
-        return true;
+        days = 90;
+        date = new Date();
+        date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000));
+        expires = "; expires=" + date.toGMTString();
+        document.cookie = "searchProvider=" + provider + expires + "; path=/";
       }
-      function getPrompt(form, stdValue) {
-        if (form.value == '') {
-          form.value = stdValue;
+      function initProvider() {
+        if (document.cookie.length>0) {
+          cStart=document.cookie.indexOf("searchProvider=");
+          if (cStart!=-1) {
+            cStart=cStart + "searchProvider=".length;
+            cEnd=document.cookie.indexOf(";", cStart);
+            if (cEnd==-1) {
+              cEnd=document.cookie.length;
+            }
+            provider = unescape(document.cookie.substring(cStart,cEnd));
+            document.forms['searchform'].elements['searchProvider'].value = provider;
+          }
         }
-        return true;
       }
     </script>
   </head>
-  <body class="composite">
-    <div id="banner">
-                  <a href="" id="bannerLeft"  title="Apache Tika"  >
+  <body onLoad="initProvider();">
+    <div id="body">
+      <div id="banner">
+                    <a href="" id="bannerLeft"  title="Apache Tika"  >
     
                                             <img src="../tika.png" alt="Apache Tika" />
     
             </a>
-                        <a href="www.apache.org" id="bannerRight"  title="Apache"  >
+                          <a href="http://www.apache.org/" id="bannerRight"  title="The Apache Software Foundation"  >
     
-                                    <img src="http://www.apache.org/images/feather-small.gif" alt="Apache" />
+                                            <img src="../asf-logo.gif" alt="The Apache Software Foundation" />
     
             </a>
-            <div class="clear">
-        <hr/>
-      </div>
-    </div>
-    <div id="search">
-      <script type="text/javascript">
-        function selectProvider(form) {
-          provider = form.elements['searchProvider'].value;
-          if (provider == "any") {
-            if (Math.random() > 0.5) {
-              provider = "lucid";
-            } else {
-              provider = "sl";
-            }
-          }
-
-          if (provider == "lucid") {
-            form.action = "http://search.lucidimagination.com/p:tika";
-          } else if (provider == "sl") {
-            form.action = "http://search-lucene.com/tika";
-          }
-
-          days = 90;
-          date = new Date();
-          date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000));
-          expires = "; expires=" + date.toGMTString();
-          document.cookie = "searchProvider=" + provider + expires + "; path=/";
-        }
-      </script>
-      <form action="http://search.lucidimagination.com/p:tika" method="get" id="searchform">
-        <input type="text" id="query" name="q" size="30" onFocus="getBlank (this, 'Search with Apache Solr');" value="Search with Apache Solr"></input>
-        <input type="submit" value="Search" name="Search" onclick="selectProvider(this.form)"/>
-        @
-        <select name="searchProvider" id="searchProvider">
-          <option value="any">select provider</option>
-          <option value="lucid">Lucid Find</option>
-          <option value="sl">Search-Lucene</option>
-        </select>
-        <script type="text/javascript">
-          if (document.cookie.length>0) {
-            cStart=document.cookie.indexOf("searchProvider=");
-            if (cStart!=-1) {
-              cStart=cStart + "searchProvider=".length;
-              cEnd=document.cookie.indexOf(";", cStart);
-              if (cEnd==-1) {
-                cEnd=document.cookie.length;
-              }
-              provider = unescape(document.cookie.substring(cStart,cEnd));
-              document.forms['searchform'].elements['searchProvider'].value = provider;
-            }
-          }
-        </script>
-      </form>
-      <div class="clear">
-        <hr/>
+            </div>
+      <div id="content">
+        <!-- Licensed to the Apache Software Foundation (ASF) under one or more --><!-- contributor license agreements.  See the NOTICE file distributed with --><!-- this work for additional information regarding copyright ownership. --><!-- The ASF licenses this file to You under the Apache License, Version 2.0 --><!-- (the "License"); you may not use this file except in compliance with --><!-- the License.  You may obtain a copy of the License at --><!--  --><!-- http://www.apache.org/licenses/LICENSE-2.0 --><!--  --><!-- Unless required by applicable law or agreed to in writing, software --><!-- distributed under the License is distributed on an "AS IS" BASIS, --><!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --><!-- See the License for the specific language governing permissions and --><!-- limitations under the License. --><div class="section"><h2>Supported Document Formats<a name="Supported_Document_Formats"></a></h2><p>This page lists al
 l the document formats supported by Apache Tika 0.6. Follow the links to the various parser class javadocs for more detailed information about each document format and how it is parsed by Tika.</p><ul><li><a href="#Supported_Document_Formats">Supported Document Formats</a><ul><li><a href="#HyperText_Markup_Language">HyperText Markup Language</a></li><li><a href="#XML_and_derived_formats">XML and derived formats</a></li><li><a href="#Microsoft_Office_document_formats">Microsoft Office document formats</a></li><li><a href="#OpenDocument_Format">OpenDocument Format</a></li><li><a href="#Portable_Document_Format">Portable Document Format</a></li><li><a href="#Electronic_Publication_Format">Electronic Publication Format</a></li><li><a href="#Rich_Text_Format">Rich Text Format</a></li><li><a href="#Compression_and_packaging_formats">Compression and packaging formats</a></li><li><a href="#Text_formats">Text formats</a></li><li><a href="#Audio_formats">Audio formats</a></li><li><a h
 ref="#Image_formats">Image formats</a></li><li><a href="#Video_formats">Video formats</a></li><li><a href="#Java_class_files_and_archives">Java class files and archives</a></li><li><a href="#The_mbox_format">The mbox format</a></li></ul></li></ul><div class="section"><h3><a name="HyperText_Markup_Language">HyperText Markup Language</a><a name="HyperText_Markup_Language"></a></h3><p>The HyperText Markup Language (HTML) is the lingua franca of the web. Tika uses the <a class="externalLink" href="http://home.ccil.org/~cowan/XML/tagsoup/">TagSoup</a> library to support virtually any kind of HTML found on the web. The output from the <a href="./api/org/apache/tika/parser/html/HtmlParser.html">HtmlParser</a> class is guaranteed to be well-formed and valid XHTML, and various heuristics are used to prevent things like inline scripts from cluttering the extracted text content.</p></div><div class="section"><h3><a name="XML_and_derived_formats">XML and derived formats</a><a name="XML_
 and_derived_formats"></a></h3><p>The Extensible Markup Language (XML) format is a generic format that can be used for all kinds of content. Tika has custom parsers for some widely used XML vocabularies like XHTML, OOXML and ODF, but the default <a href="./api/org/apache/tika/parser/xml/DcXMLParser.html">DcXMLParser</a> class simply extracts the text content of the document and ignores any XML structure. The only exception to this rule are Dublin Core metadata elements that are used for the document metadata.</p></div><div class="section"><h3><a name="Microsoft_Office_document_formats">Microsoft Office document formats</a><a name="Microsoft_Office_document_formats"></a></h3><p>Microsoft Office and some related applications produce documents in the generic OLE 2 Compound Document and Office Open XML (OOXML) formats. The older OLE 2 format was introduced in Microsoft Office version 97 and was the default format until Office version 2007 and the new XML-based OOXML format. The <
 a href="./api/org/apache/tika/parser/microsoft/OfficeParser.html">OfficeParser</a> and <a href="./api/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.html">OOXMLParser</a> classes use <a class="externalLink" href="http://poi.apache.org/">Apache POI</a> libraries to support text and metadata extraction from both OLE2 and OOXML documents.</p></div><div class="section"><h3><a name="OpenDocument_Format">OpenDocument Format</a><a name="OpenDocument_Format"></a></h3><p>The OpenDocument format (ODF) is used most notably as the default format of the OpenOffice.org office suite. The <a href="./api/org/apache/tika/parser/odf/OpenDocumentParser.html">OpenDocumentParser</a> class supports this format and the earlier OpenOffice 1.0 format on which ODF is based.</p></div><div class="section"><h3><a name="Portable_Document_Format">Portable Document Format</a><a name="Portable_Document_Format"></a></h3><p>The <a href="./api/org/apache/tika/parser/pdf/PDFParser.html">PDFParser</a> class p
 arsers Portable Document Format (PDF) documents using the <a class="externalLink" href="http://pdfbox.apache.org/">Apache PDFBox</a> library.</p></div><div class="section"><h3><a name="Electronic_Publication_Format">Electronic Publication Format</a><a name="Electronic_Publication_Format"></a></h3><p>The <a href="./api/org/apache/tika/parser/epub/EpubParser.html">EpubParser</a> class supports the Electronic Publication Format (EPUB) used for many digital books.</p></div><div class="section"><h3><a name="Rich_Text_Format">Rich Text Format</a><a name="Rich_Text_Format"></a></h3><p>The <a href="./api/org/apache/tika/parser/rtf/RTFParser.html">RTFParser</a> class uses the standard javax.swing.text.rtf feature to extract text content from Rich Text Format (RTF) documents.</p></div><div class="section"><h3><a name="Compression_and_packaging_formats">Compression and packaging formats</a><a name="Compression_and_packaging_formats"></a></h3><p>Tika uses the <a class="externalLink" hre
 f="http://commons.apache.org/compress/">Commons Compress</a> library to support various compression and packaging formats. The <a href="./api/org/apache/tika/parser/pkg/PackageParser.html">PackageParser</a> class and its subclasses first parse the top level compression or packaging format and then pass the unpacked document streams to a second parsing stage using the parser instance specified in the parse context.</p></div><div class="section"><h3><a name="Text_formats">Text formats</a><a name="Text_formats"></a></h3><p>Extracting text content from plain text files seems like a simple task until you start thinking of all the possible character encodings. The <a href="./api/org/apache/tika/parser/txt/TXTParser.html">TXTParser</a> class uses encoding detection code from the <a class="externalLink" href="http://site.icu-project.org/">ICU</a> project to automatically detect the character encoding of a text document.</p></div><div class="section"><h3><a name="Audio_formats">Audio
  formats</a><a name="Audio_formats"></a></h3><p>Tika can detect several common audio formats and extract metadata from them. Even text extraction is supported for some audio files that contain lyrics or other textual content. The <a href="./api/org/apache/tika/parser/audio/AudioParser.html">AudioParser</a> and <a href="./api/org/apache/tika/parser/audio/MidiParser.html">MidiParser</a> classes use standard javax.sound features to process simple audio formats, and the <a href="./api/org/apache/tika/parser/mp3/Mp3Parser.html">Mp3Parser</a> class adds support for the widely used MP3 format.</p></div><div class="section"><h3><a name="Image_formats">Image formats</a><a name="Image_formats"></a></h3><p>The <a href="./api/org/apache/tika/parser/image/ImageParser.html">ImageParser</a> class uses the standard javax.imageio feature to extract simple metadata from image formats supported by the Java platform. More complex image metadata is available through the <a href="./api/org/apache
 /tika/parser/jpeg/JpegParser.html">JpegParser</a> class that uses the metadata-extractor library to supports Exif metadata extraction from Jpeg images.</p></div><div class="section"><h3><a name="Video_formats">Video formats</a><a name="Video_formats"></a></h3><p>Currently Tika only supports the Flash video format using a simple parsing algorithm implemented in the <a href="./api/org/apache/tika/parser/flv/FLVParser">FLVParser</a> class.</p></div><div class="section"><h3><a name="Java_class_files_and_archives">Java class files and archives</a><a name="Java_class_files_and_archives"></a></h3><p>The <a href="./api/org/apache/tika/parser/asm/ClassParser">ClassParser</a> class extracts class names and method signatures from Java class files, and the <a href="./api/org/apache/tika/parser/pkg/ZipParser.html">ZipParser</a> class supports also jar archives.</p></div><div class="section"><h3><a name="The_mbox_format">The mbox format</a><a name="The_mbox_format"></a></h3><p>The <a href
 ="./api/org/apache/tika/parser/mbox/MboxParser.html">MboxParser</a> can extract email messages from the mbox format used by many email archives and Unix-style mailboxes.</p></div></div>
       </div>
-    </div>
-    <div id="leftColumn">
-      <div id="navcolumn">
-                 <h5>Apache Tika</h5>
+      <div id="sidebar">
+        <div id="navigation">
+                    <h5>Apache Tika</h5>
             <ul>
               
     <li class="none">
@@ -147,7 +117,7 @@
               <h5>Documentation</h5>
             <ul>
               
-          
+                
                     
                   
                   
@@ -155,35 +125,9 @@
                   
                   
               
-        <li class="expanded">
-                    <a href="../0.7/index.html">Tika 0.7</a>
-                  <ul>
-                  
-    <li class="none">
-                    <a href="../0.7/gettingstarted.html">Getting Started</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/formats.html">Supported Formats</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/parser.html">Parser API</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/parser_guide.html">Parser 5min Quick Start Guide</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/detection.html">Content and Language Detection</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/api/">API Documentation</a>
-          </li>
-              </ul>
-        </li>
+        <li class="collapsed">
+                    <a href="../0.7/index.html">Apache Tika 0.7</a>
+                </li>
               
                 
                     
@@ -192,7 +136,7 @@
                   
               
             <li class="expanded">
-                    <a href="../0.6/index.html">Tika 0.6</a>
+                    <a href="../0.6/index.html">Apache Tika 0.6</a>
                   <ul>
                   
     <li class="none">
@@ -220,36 +164,47 @@
                   
               
         <li class="collapsed">
-                    <a href="../0.5/index.html">Tika 0.5</a>
+                    <a href="../0.5/index.html">Apache Tika 0.5</a>
                 </li>
           </ul>
-            </div>
-      <div id="bookpromo">  
-        <a href="http://manning.com/mattmann/" title="Tika in Action"
-           ><img src="../mattmann_cover150.jpg"
-                 border="0" width="150" height="186"/></a>
-      </div>
-    </div>
-    <div id="bodyColumn">
-      <div id="contentBox">
-        <!-- Licensed to the Apache Software Foundation (ASF) under one or more --><!-- contributor license agreements.  See the NOTICE file distributed with --><!-- this work for additional information regarding copyright ownership. --><!-- The ASF licenses this file to You under the Apache License, Version 2.0 --><!-- (the "License"); you may not use this file except in compliance with --><!-- the License.  You may obtain a copy of the License at --><!--  --><!-- http://www.apache.org/licenses/LICENSE-2.0 --><!--  --><!-- Unless required by applicable law or agreed to in writing, software --><!-- distributed under the License is distributed on an "AS IS" BASIS, --><!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --><!-- See the License for the specific language governing permissions and --><!-- limitations under the License. --><div class="section"><h2>Supported Document Formats<a name="Supported_Document_Formats"></a></h2><p>This page lists al
 l the document formats supported by Apache Tika 0.6. Follow the links to the various parser class javadocs for more detailed information about each document format and how it is parsed by Tika.</p><ul><li><a href="#Supported_Document_Formats">Supported Document Formats</a><ul><li><a href="#HyperText_Markup_Language">HyperText Markup Language</a></li><li><a href="#XML_and_derived_formats">XML and derived formats</a></li><li><a href="#Microsoft_Office_document_formats">Microsoft Office document formats</a></li><li><a href="#OpenDocument_Format">OpenDocument Format</a></li><li><a href="#Portable_Document_Format">Portable Document Format</a></li><li><a href="#Electronic_Publication_Format">Electronic Publication Format</a></li><li><a href="#Rich_Text_Format">Rich Text Format</a></li><li><a href="#Compression_and_packaging_formats">Compression and packaging formats</a></li><li><a href="#Text_formats">Text formats</a></li><li><a href="#Audio_formats">Audio formats</a></li><li><a h
 ref="#Image_formats">Image formats</a></li><li><a href="#Video_formats">Video formats</a></li><li><a href="#Java_class_files_and_archives">Java class files and archives</a></li><li><a href="#The_mbox_format">The mbox format</a></li></ul></li></ul><div class="section"><h3><a name="HyperText_Markup_Language">HyperText Markup Language</a><a name="HyperText_Markup_Language"></a></h3><p>The HyperText Markup Language (HTML) is the lingua franca of the web. Tika uses the <a class="externalLink" href="http://home.ccil.org/~cowan/XML/tagsoup/">TagSoup</a> library to support virtually any kind of HTML found on the web. The output from the <a href="./api/org/apache/tika/parser/html/HtmlParser.html">HtmlParser</a> class is guaranteed to be well-formed and valid XHTML, and various heuristics are used to prevent things like inline scripts from cluttering the extracted text content.</p></div><div class="section"><h3><a name="XML_and_derived_formats">XML and derived formats</a><a name="XML_
 and_derived_formats"></a></h3><p>The Extensible Markup Language (XML) format is a generic format that can be used for all kinds of content. Tika has custom parsers for some widely used XML vocabularies like XHTML, OOXML and ODF, but the default <a href="./api/org/apache/tika/parser/xml/DcXMLParser.html">DcXMLParser</a> class simply extracts the text content of the document and ignores any XML structure. The only exception to this rule are Dublin Core metadata elements that are used for the document metadata.</p></div><div class="section"><h3><a name="Microsoft_Office_document_formats">Microsoft Office document formats</a><a name="Microsoft_Office_document_formats"></a></h3><p>Microsoft Office and some related applications produce documents in the generic OLE 2 Compound Document and Office Open XML (OOXML) formats. The older OLE 2 format was introduced in Microsoft Office version 97 and was the default format until Office version 2007 and the new XML-based OOXML format. The <
 a href="./api/org/apache/tika/parser/microsoft/OfficeParser.html">OfficeParser</a> and <a href="./api/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.html">OOXMLParser</a> classes use <a class="externalLink" href="http://poi.apache.org/">Apache POI</a> libraries to support text and metadata extraction from both OLE2 and OOXML documents.</p></div><div class="section"><h3><a name="OpenDocument_Format">OpenDocument Format</a><a name="OpenDocument_Format"></a></h3><p>The OpenDocument format (ODF) is used most notably as the default format of the OpenOffice.org office suite. The <a href="./api/org/apache/tika/parser/odf/OpenDocumentParser.html">OpenDocumentParser</a> class supports this format and the earlier OpenOffice 1.0 format on which ODF is based.</p></div><div class="section"><h3><a name="Portable_Document_Format">Portable Document Format</a><a name="Portable_Document_Format"></a></h3><p>The <a href="./api/org/apache/tika/parser/pdf/PDFParser.html">PDFParser</a> class p
 arsers Portable Document Format (PDF) documents using the <a class="externalLink" href="http://pdfbox.apache.org/">Apache PDFBox</a> library.</p></div><div class="section"><h3><a name="Electronic_Publication_Format">Electronic Publication Format</a><a name="Electronic_Publication_Format"></a></h3><p>The <a href="./api/org/apache/tika/parser/epub/EpubParser.html">EpubParser</a> class supports the Electronic Publication Format (EPUB) used for many digital books.</p></div><div class="section"><h3><a name="Rich_Text_Format">Rich Text Format</a><a name="Rich_Text_Format"></a></h3><p>The <a href="./api/org/apache/tika/parser/rtf/RTFParser.html">RTFParser</a> class uses the standard javax.swing.text.rtf feature to extract text content from Rich Text Format (RTF) documents.</p></div><div class="section"><h3><a name="Compression_and_packaging_formats">Compression and packaging formats</a><a name="Compression_and_packaging_formats"></a></h3><p>Tika uses the <a class="externalLink" hre
 f="http://commons.apache.org/compress/">Commons Compress</a> library to support various compression and packaging formats. The <a href="./api/org/apache/tika/parser/pkg/PackageParser.html">PackageParser</a> class and its subclasses first parse the top level compression or packaging format and then pass the unpacked document streams to a second parsing stage using the parser instance specified in the parse context.</p></div><div class="section"><h3><a name="Text_formats">Text formats</a><a name="Text_formats"></a></h3><p>Extracting text content from plain text files seems like a simple task until you start thinking of all the possible character encodings. The <a href="./api/org/apache/tika/parser/txt/TXTParser.html">TXTParser</a> class uses encoding detection code from the <a class="externalLink" href="http://site.icu-project.org/">ICU</a> project to automatically detect the character encoding of a text document.</p></div><div class="section"><h3><a name="Audio_formats">Audio
  formats</a><a name="Audio_formats"></a></h3><p>Tika can detect several common audio formats and extract metadata from them. Even text extraction is supported for some audio files that contain lyrics or other textual content. The <a href="./api/org/apache/tika/parser/audio/AudioParser.html">AudioParser</a> and <a href="./api/org/apache/tika/parser/audio/MidiParser.html">MidiParser</a> classes use standard javax.sound features to process simple audio formats, and the <a href="./api/org/apache/tika/parser/mp3/Mp3Parser.html">Mp3Parser</a> class adds support for the widely used MP3 format.</p></div><div class="section"><h3><a name="Image_formats">Image formats</a><a name="Image_formats"></a></h3><p>The <a href="./api/org/apache/tika/parser/image/ImageParser.html">ImageParser</a> class uses the standard javax.imageio feature to extract simple metadata from image formats supported by the Java platform. More complex image metadata is available through the <a href="./api/org/apache
 /tika/parser/jpeg/JpegParser.html">JpegParser</a> class that uses the metadata-extractor library to supports Exif metadata extraction from Jpeg images.</p></div><div class="section"><h3><a name="Video_formats">Video formats</a><a name="Video_formats"></a></h3><p>Currently Tika only supports the Flash video format using a simple parsing algorithm implemented in the <a href="./api/org/apache/tika/parser/flv/FLVParser">FLVParser</a> class.</p></div><div class="section"><h3><a name="Java_class_files_and_archives">Java class files and archives</a><a name="Java_class_files_and_archives"></a></h3><p>The <a href="./api/org/apache/tika/parser/asm/ClassParser">ClassParser</a> class extracts class names and method signatures from Java class files, and the <a href="./api/org/apache/tika/parser/pkg/ZipParser.html">ZipParser</a> class supports also jar archives.</p></div><div class="section"><h3><a name="The_mbox_format">The mbox format</a><a name="The_mbox_format"></a></h3><p>The <a href
 ="./api/org/apache/tika/parser/mbox/MboxParser.html">MboxParser</a> can extract email messages from the mbox format used by many email archives and Unix-style mailboxes.</p></div></div>
+      
+          <div id="search">
+            <h5>Search with Apache Solr</h5>
+            <form action="http://search.lucidimagination.com/p:tika"
+                  method="get" id="searchform">
+              <input type="text" id="query" name="q"/>
+              <select name="searchProvider" id="searchProvider">
+                <option value="any">provider</option>
+                <option value="lucid">Lucid Find</option>
+                <option value="sl">Search-Lucene</option>
+              </select>
+              <input type="submit" id="submit" value="Search" name="Search"
+                     onclick="selectProvider(this.form)"/>
+            </form>
+          </div>
+
+          <div id="bookpromo">
+            <h5>Books about Tika</h5>
+            <p>
+              <a href="http://manning.com/mattmann/" title="Tika in Action"
+                ><img src="../mattmann_cover150.jpg"
+                      width="150" height="186"/></a>
+            </p>
+          </div>
+        </div>
       </div>
-    </div>
-    <div class="clear">
-      <hr/>
-    </div>
-    <div id="footer">
-      <p>
-        Copyright 2010
-        <a href="http://www.apache.org/">The Apache Software Foundation</a>.
-        Site powered by <a href="http://maven.apache.org/">Apache Maven</a>. 
-        Search powered by <a href="http://www.lucidimagination.com">Lucid Imagination</a> & <a href="http://sematext.com">Sematext</a>.
-        <br/>
-        Apache Tika, Tika, Apache, the Apache feather logo, and the Apache
-        Tika project logo are trademarks of The Apache Software Foundation.
-      </p>
-      <div class="clear">
-        <hr/>
+      <div id="footer">
+        <p>
+          Copyright &#169; 2010
+          <a href="http://www.apache.org/">The Apache Software Foundation</a>.
+          Site powered by <a href="http://maven.apache.org/">Apache Maven</a>. 
+          Search powered by
+          <a href="http://www.lucidimagination.com">Lucid Imagination</a>
+          and <a href="http://sematext.com">Sematext</a>.
+          <br/>
+          Apache Tika, Tika, Apache, the Apache feather logo, and the Apache
+          Tika project logo are trademarks of The Apache Software Foundation.
+        </p>
       </div>
     </div>
   </body>

Modified: tika/site/publish/0.6/gettingstarted.html
URL: http://svn.apache.org/viewvc/tika/site/publish/0.6/gettingstarted.html?rev=1029500&r1=1029499&r2=1029500&view=diff
==============================================================================
--- tika/site/publish/0.6/gettingstarted.html (original)
+++ tika/site/publish/0.6/gettingstarted.html Sun Oct 31 21:49:59 2010
@@ -36,202 +36,57 @@
     </style>
     <link rel="icon" type="image/png" href="../tikaNoText16.png" />
     <script type="text/javascript">
-      function getBlank(form, stdValue) {
-        if (form.value == stdValue) {
-          form.value = '';
+      function selectProvider(form) {
+        provider = form.elements['searchProvider'].value;
+        if (provider == "any") {
+          if (Math.random() > 0.5) {
+            provider = "lucid";
+          } else {
+            provider = "sl";
+          }
+        }
+        if (provider == "lucid") {
+          form.action = "http://search.lucidimagination.com/p:tika";
+        } else if (provider == "sl") {
+          form.action = "http://search-lucene.com/tika";
         }
-        return true;
+        days = 90;
+        date = new Date();
+        date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000));
+        expires = "; expires=" + date.toGMTString();
+        document.cookie = "searchProvider=" + provider + expires + "; path=/";
       }
-      function getPrompt(form, stdValue) {
-        if (form.value == '') {
-          form.value = stdValue;
+      function initProvider() {
+        if (document.cookie.length>0) {
+          cStart=document.cookie.indexOf("searchProvider=");
+          if (cStart!=-1) {
+            cStart=cStart + "searchProvider=".length;
+            cEnd=document.cookie.indexOf(";", cStart);
+            if (cEnd==-1) {
+              cEnd=document.cookie.length;
+            }
+            provider = unescape(document.cookie.substring(cStart,cEnd));
+            document.forms['searchform'].elements['searchProvider'].value = provider;
+          }
         }
-        return true;
       }
     </script>
   </head>
-  <body class="composite">
-    <div id="banner">
-                  <a href="" id="bannerLeft"  title="Apache Tika"  >
+  <body onLoad="initProvider();">
+    <div id="body">
+      <div id="banner">
+                    <a href="" id="bannerLeft"  title="Apache Tika"  >
     
                                             <img src="../tika.png" alt="Apache Tika" />
     
             </a>
-                        <a href="www.apache.org" id="bannerRight"  title="Apache"  >
+                          <a href="http://www.apache.org/" id="bannerRight"  title="The Apache Software Foundation"  >
     
-                                    <img src="http://www.apache.org/images/feather-small.gif" alt="Apache" />
+                                            <img src="../asf-logo.gif" alt="The Apache Software Foundation" />
     
             </a>
-            <div class="clear">
-        <hr/>
-      </div>
-    </div>
-    <div id="search">
-      <script type="text/javascript">
-        function selectProvider(form) {
-          provider = form.elements['searchProvider'].value;
-          if (provider == "any") {
-            if (Math.random() > 0.5) {
-              provider = "lucid";
-            } else {
-              provider = "sl";
-            }
-          }
-
-          if (provider == "lucid") {
-            form.action = "http://search.lucidimagination.com/p:tika";
-          } else if (provider == "sl") {
-            form.action = "http://search-lucene.com/tika";
-          }
-
-          days = 90;
-          date = new Date();
-          date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000));
-          expires = "; expires=" + date.toGMTString();
-          document.cookie = "searchProvider=" + provider + expires + "; path=/";
-        }
-      </script>
-      <form action="http://search.lucidimagination.com/p:tika" method="get" id="searchform">
-        <input type="text" id="query" name="q" size="30" onFocus="getBlank (this, 'Search with Apache Solr');" value="Search with Apache Solr"></input>
-        <input type="submit" value="Search" name="Search" onclick="selectProvider(this.form)"/>
-        @
-        <select name="searchProvider" id="searchProvider">
-          <option value="any">select provider</option>
-          <option value="lucid">Lucid Find</option>
-          <option value="sl">Search-Lucene</option>
-        </select>
-        <script type="text/javascript">
-          if (document.cookie.length>0) {
-            cStart=document.cookie.indexOf("searchProvider=");
-            if (cStart!=-1) {
-              cStart=cStart + "searchProvider=".length;
-              cEnd=document.cookie.indexOf(";", cStart);
-              if (cEnd==-1) {
-                cEnd=document.cookie.length;
-              }
-              provider = unescape(document.cookie.substring(cStart,cEnd));
-              document.forms['searchform'].elements['searchProvider'].value = provider;
-            }
-          }
-        </script>
-      </form>
-      <div class="clear">
-        <hr/>
-      </div>
-    </div>
-    <div id="leftColumn">
-      <div id="navcolumn">
-                 <h5>Apache Tika</h5>
-            <ul>
-              
-    <li class="none">
-                    <a href="../index.html">Introduction</a>
-          </li>
-              
-    <li class="none">
-                    <a href="../download.html">Download</a>
-          </li>
-              
-    <li class="none">
-                    <a href="../mail-lists.html">Mailing Lists</a>
-          </li>
-              
-    <li class="none">
-                    <a href="http://wiki.apache.org/tika/" class="externalLink">Tika Wiki</a>
-          </li>
-              
-    <li class="none">
-                    <a href="https://issues.apache.org/jira/browse/TIKA" class="externalLink">Issue Tracker</a>
-          </li>
-          </ul>
-              <h5>Documentation</h5>
-            <ul>
-              
-          
-                    
-                  
-                  
-                  
-                  
-                  
-              
-        <li class="expanded">
-                    <a href="../0.7/index.html">Tika 0.7</a>
-                  <ul>
-                  
-    <li class="none">
-                    <a href="../0.7/gettingstarted.html">Getting Started</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/formats.html">Supported Formats</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/parser.html">Parser API</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/parser_guide.html">Parser 5min Quick Start Guide</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/detection.html">Content and Language Detection</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/api/">API Documentation</a>
-          </li>
-              </ul>
-        </li>
-              
-                
-                          
-                  
-                  
-                  
-              
-            <li class="expanded">
-                    <a href="../0.6/index.html">Tika 0.6</a>
-                  <ul>
-                  
-    <li class="none">
-              <strong>Getting Started</strong>
-        </li>
-                  
-    <li class="none">
-                    <a href="../0.6/formats.html">Supported Formats</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.6/parser.html">Parser API</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.6/api/">API Documentation</a>
-          </li>
-              </ul>
-        </li>
-              
-                
-                    
-                  
-                  
-                  
-              
-        <li class="collapsed">
-                    <a href="../0.5/index.html">Tika 0.5</a>
-                </li>
-          </ul>
             </div>
-      <div id="bookpromo">  
-        <a href="http://manning.com/mattmann/" title="Tika in Action"
-           ><img src="../mattmann_cover150.jpg"
-                 border="0" width="150" height="186"/></a>
-      </div>
-    </div>
-    <div id="bodyColumn">
-      <div id="contentBox">
+      <div id="content">
         <!-- Licensed to the Apache Software Foundation (ASF) under one or more --><!-- contributor license agreements.  See the NOTICE file distributed with --><!-- this work for additional information regarding copyright ownership. --><!-- The ASF licenses this file to You under the Apache License, Version 2.0 --><!-- (the "License"); you may not use this file except in compliance with --><!-- the License.  You may obtain a copy of the License at --><!--  --><!-- http://www.apache.org/licenses/LICENSE-2.0 --><!--  --><!-- Unless required by applicable law or agreed to in writing, software --><!-- distributed under the License is distributed on an "AS IS" BASIS, --><!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --><!-- See the License for the specific language governing permissions and --><!-- limitations under the License. --><div class="section"><h2>Getting Started with Apache Tika<a name="Getting_Started_with_Apache_Tika"></a></h2><p>This d
 ocument describes how to build Apache Tika from sources and how to start using Tika in an application.</p></div><div class="section"><h2>Getting and building the sources<a name="Getting_and_building_the_sources"></a></h2><p>To build Tika from sources you first need to either <a href="../download.html">download</a> a source release or <a href="../source-repository.html">checkout</a> the latest sources from version control.</p><p>Once you have the sources, you can build them using the <a class="externalLink" href="http://maven.apache.org/">Maven 2</a> build system. Executing the following command in the base directory will build the sources and install the resulting artifacts in your local Maven repository.</p><div><pre>mvn install</pre></div><p>See the Maven documentation for more information about the available build options.</p><p>Note that you need Java 5 or higher to build Tika.</p></div><div class="section"><h2>Build artifacts<a name="Build_artifacts"></a></h2><p>The Tik
 a 0.6 build consists of a number of components and produces the following main binaries:</p><dl><dt>tika-core/target/tika-core-0.6.jar</dt><dd> Tika core library. Contains the core interfaces and classes of Tika, but none of the parser implementations. Depends only on Java 5.</dd><dt>tika-parsers/target/tika-parsers-0.6.jar</dt><dd> Tika parsers. Collection of classes that implement the Tika Parser interface based on various external parser libraries.</dd><dt>tika-app/target/tika-app-0.6.jar</dt><dd> Tika application. Combines the above libraries and all the external parser libraries into a single runnable jar with a GUI and a command line interface.</dd><dt>tika-bundle/target/tika-bundle-0.6.jar</dt><dd> Tika bundle. An OSGi bundle that includes everything you need to use all Tika functionality in an OSGi environment.</dd></dl></div><div class="section"><h2>Using Tika as a Maven dependency<a name="Using_Tika_as_a_Maven_dependency"></a></h2><p>The core library, tika-core, co
 ntains the key interfaces and classes of Tika and can be used by itself if you don't need the full set of parsers from the tika-parsers component. The tika-core dependency looks like this:</p><div><pre>  &lt;dependency&gt;
     &lt;groupId&gt;org.apache.tika&lt;/groupId&gt;
     &lt;artifactId&gt;tika-core&lt;/artifactId&gt;
@@ -310,22 +165,122 @@ curl http://.../document.doc \
   | java -jar tika-app-0.6.jar --text \
   | grep -q keyword</pre></div></div>
       </div>
-    </div>
-    <div class="clear">
-      <hr/>
-    </div>
-    <div id="footer">
-      <p>
-        Copyright 2010
-        <a href="http://www.apache.org/">The Apache Software Foundation</a>.
-        Site powered by <a href="http://maven.apache.org/">Apache Maven</a>. 
-        Search powered by <a href="http://www.lucidimagination.com">Lucid Imagination</a> & <a href="http://sematext.com">Sematext</a>.
-        <br/>
-        Apache Tika, Tika, Apache, the Apache feather logo, and the Apache
-        Tika project logo are trademarks of The Apache Software Foundation.
-      </p>
-      <div class="clear">
-        <hr/>
+      <div id="sidebar">
+        <div id="navigation">
+                    <h5>Apache Tika</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../index.html">Introduction</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../download.html">Download</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../mail-lists.html">Mailing Lists</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://wiki.apache.org/tika/" class="externalLink">Tika Wiki</a>
+          </li>
+              
+    <li class="none">
+                    <a href="https://issues.apache.org/jira/browse/TIKA" class="externalLink">Issue Tracker</a>
+          </li>
+          </ul>
+              <h5>Documentation</h5>
+            <ul>
+              
+                
+                    
+                  
+                  
+                  
+                  
+                  
+              
+        <li class="collapsed">
+                    <a href="../0.7/index.html">Apache Tika 0.7</a>
+                </li>
+              
+                
+                          
+                  
+                  
+                  
+              
+            <li class="expanded">
+                    <a href="../0.6/index.html">Apache Tika 0.6</a>
+                  <ul>
+                  
+    <li class="none">
+              <strong>Getting Started</strong>
+        </li>
+                  
+    <li class="none">
+                    <a href="../0.6/formats.html">Supported Formats</a>
+          </li>
+                  
+    <li class="none">
+                    <a href="../0.6/parser.html">Parser API</a>
+          </li>
+                  
+    <li class="none">
+                    <a href="../0.6/api/">API Documentation</a>
+          </li>
+              </ul>
+        </li>
+              
+                
+                    
+                  
+                  
+                  
+              
+        <li class="collapsed">
+                    <a href="../0.5/index.html">Apache Tika 0.5</a>
+                </li>
+          </ul>
+      
+          <div id="search">
+            <h5>Search with Apache Solr</h5>
+            <form action="http://search.lucidimagination.com/p:tika"
+                  method="get" id="searchform">
+              <input type="text" id="query" name="q"/>
+              <select name="searchProvider" id="searchProvider">
+                <option value="any">provider</option>
+                <option value="lucid">Lucid Find</option>
+                <option value="sl">Search-Lucene</option>
+              </select>
+              <input type="submit" id="submit" value="Search" name="Search"
+                     onclick="selectProvider(this.form)"/>
+            </form>
+          </div>
+
+          <div id="bookpromo">
+            <h5>Books about Tika</h5>
+            <p>
+              <a href="http://manning.com/mattmann/" title="Tika in Action"
+                ><img src="../mattmann_cover150.jpg"
+                      width="150" height="186"/></a>
+            </p>
+          </div>
+        </div>
+      </div>
+      <div id="footer">
+        <p>
+          Copyright &#169; 2010
+          <a href="http://www.apache.org/">The Apache Software Foundation</a>.
+          Site powered by <a href="http://maven.apache.org/">Apache Maven</a>. 
+          Search powered by
+          <a href="http://www.lucidimagination.com">Lucid Imagination</a>
+          and <a href="http://sematext.com">Sematext</a>.
+          <br/>
+          Apache Tika, Tika, Apache, the Apache feather logo, and the Apache
+          Tika project logo are trademarks of The Apache Software Foundation.
+        </p>
       </div>
     </div>
   </body>

Modified: tika/site/publish/0.6/index.html
URL: http://svn.apache.org/viewvc/tika/site/publish/0.6/index.html?rev=1029500&r1=1029499&r2=1029500&view=diff
==============================================================================
--- tika/site/publish/0.6/index.html (original)
+++ tika/site/publish/0.6/index.html Sun Oct 31 21:49:59 2010
@@ -36,92 +36,62 @@
     </style>
     <link rel="icon" type="image/png" href="../tikaNoText16.png" />
     <script type="text/javascript">
-      function getBlank(form, stdValue) {
-        if (form.value == stdValue) {
-          form.value = '';
+      function selectProvider(form) {
+        provider = form.elements['searchProvider'].value;
+        if (provider == "any") {
+          if (Math.random() > 0.5) {
+            provider = "lucid";
+          } else {
+            provider = "sl";
+          }
+        }
+        if (provider == "lucid") {
+          form.action = "http://search.lucidimagination.com/p:tika";
+        } else if (provider == "sl") {
+          form.action = "http://search-lucene.com/tika";
         }
-        return true;
+        days = 90;
+        date = new Date();
+        date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000));
+        expires = "; expires=" + date.toGMTString();
+        document.cookie = "searchProvider=" + provider + expires + "; path=/";
       }
-      function getPrompt(form, stdValue) {
-        if (form.value == '') {
-          form.value = stdValue;
+      function initProvider() {
+        if (document.cookie.length>0) {
+          cStart=document.cookie.indexOf("searchProvider=");
+          if (cStart!=-1) {
+            cStart=cStart + "searchProvider=".length;
+            cEnd=document.cookie.indexOf(";", cStart);
+            if (cEnd==-1) {
+              cEnd=document.cookie.length;
+            }
+            provider = unescape(document.cookie.substring(cStart,cEnd));
+            document.forms['searchform'].elements['searchProvider'].value = provider;
+          }
         }
-        return true;
       }
     </script>
   </head>
-  <body class="composite">
-    <div id="banner">
-                  <a href="" id="bannerLeft"  title="Apache Tika"  >
+  <body onLoad="initProvider();">
+    <div id="body">
+      <div id="banner">
+                    <a href="" id="bannerLeft"  title="Apache Tika"  >
     
                                             <img src="../tika.png" alt="Apache Tika" />
     
             </a>
-                        <a href="www.apache.org" id="bannerRight"  title="Apache"  >
+                          <a href="http://www.apache.org/" id="bannerRight"  title="The Apache Software Foundation"  >
     
-                                    <img src="http://www.apache.org/images/feather-small.gif" alt="Apache" />
+                                            <img src="../asf-logo.gif" alt="The Apache Software Foundation" />
     
             </a>
-            <div class="clear">
-        <hr/>
-      </div>
-    </div>
-    <div id="search">
-      <script type="text/javascript">
-        function selectProvider(form) {
-          provider = form.elements['searchProvider'].value;
-          if (provider == "any") {
-            if (Math.random() > 0.5) {
-              provider = "lucid";
-            } else {
-              provider = "sl";
-            }
-          }
-
-          if (provider == "lucid") {
-            form.action = "http://search.lucidimagination.com/p:tika";
-          } else if (provider == "sl") {
-            form.action = "http://search-lucene.com/tika";
-          }
-
-          days = 90;
-          date = new Date();
-          date.setTime(date.getTime() + (days * 24 * 60 * 60 * 1000));
-          expires = "; expires=" + date.toGMTString();
-          document.cookie = "searchProvider=" + provider + expires + "; path=/";
-        }
-      </script>
-      <form action="http://search.lucidimagination.com/p:tika" method="get" id="searchform">
-        <input type="text" id="query" name="q" size="30" onFocus="getBlank (this, 'Search with Apache Solr');" value="Search with Apache Solr"></input>
-        <input type="submit" value="Search" name="Search" onclick="selectProvider(this.form)"/>
-        @
-        <select name="searchProvider" id="searchProvider">
-          <option value="any">select provider</option>
-          <option value="lucid">Lucid Find</option>
-          <option value="sl">Search-Lucene</option>
-        </select>
-        <script type="text/javascript">
-          if (document.cookie.length>0) {
-            cStart=document.cookie.indexOf("searchProvider=");
-            if (cStart!=-1) {
-              cStart=cStart + "searchProvider=".length;
-              cEnd=document.cookie.indexOf(";", cStart);
-              if (cEnd==-1) {
-                cEnd=document.cookie.length;
-              }
-              provider = unescape(document.cookie.substring(cStart,cEnd));
-              document.forms['searchform'].elements['searchProvider'].value = provider;
-            }
-          }
-        </script>
-      </form>
-      <div class="clear">
-        <hr/>
+            </div>
+      <div id="content">
+        <!-- Licensed to the Apache Software Foundation (ASF) under one or more --><!-- contributor license agreements.  See the NOTICE file distributed with --><!-- this work for additional information regarding copyright ownership. --><!-- The ASF licenses this file to You under the Apache License, Version 2.0 --><!-- (the "License"); you may not use this file except in compliance with --><!-- the License.  You may obtain a copy of the License at --><!--  --><!-- http://www.apache.org/licenses/LICENSE-2.0 --><!--  --><!-- Unless required by applicable law or agreed to in writing, software --><!-- distributed under the License is distributed on an "AS IS" BASIS, --><!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --><!-- See the License for the specific language governing permissions and --><!-- limitations under the License. --><div class="section"><h2>Apache Tika 0.6<a name="Apache_Tika_0.6"></a></h2><p>The most notable changes in Tika 0.6 ove
 r the previous release are:</p><ul><li>Mime-type detection for HTML (and all types) has been improved, allowing malformed HTML files and those HTML files that require a bit more observed content before the type is properly detected, are now correctly identified by the AutoDetectParser. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-327">TIKA-327</a>, <a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-357">TIKA-357</a>, <a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-366">TIKA-366</a>, <a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-367">TIKA-367</a>)</li><li>Tika now has an additional OSGi bundle packaging that includes all the required parser libraries. This bundle package makes it easy to use all Tika features in an OSGi environment. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-340">TIKA-340</a>, <a class="externalLink" href="https://issues.apache
 .org/jira/browse/TIKA-342">TIKA-342</a>)</li><li>The Apache POI dependency used for parsing Microsoft Office file formats has been upgraded to version 3.6. The most visible improvement in this version is the notably reduced ooxml jar file size. The tika-app jar size is now down to 15MB from the 25MB in Tika 0.5. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-353">TIKA-353</a>)</li><li>Handling of character encoding information in input metadata and HTML &lt;meta&gt; tags has been improved. When no applicable encoding information is available, the encoding is detected by looking at the input data. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-332">TIKA-332</a>, <a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-334">TIKA-334</a>, <a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-335">TIKA-335</a>, <a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-341">TIK
 A-341</a>) </li><li>Some document types like Excel spreadsheets contain content like numbers or formulas whose exact text format depends on the current locale. So far Tika has used the platform default locale in such cases, but clients can now explicitly specify the locale by passing a Locale instance in the parse context. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-125">TIKA-125</a>)</li><li>The default text output encoding of the tika-app jar is now UTF-8 when running on Mac OS X. This is because the default encoding used by Java is not compatible with the console application in Mac OS X. On all other platforms the text output from tika-app still uses the platform default encoding. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-324">TIKA-324</a>)</li><li>A flash video (video/x-flv) parser has been added. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-328">TIKA-328</a>)</li><li>The handling 
 of Number and Date cell formatting within the Microsoft Excel documents has been added. This include currencies, percentages and scientific formats. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-103">TIKA-103</a>)</li></ul><p>The following people have contributed to Tika 0.6 by submitting or commenting on the issues resolved in this release:</p><ul><li>Andrzej Bialecki</li><li>Bertrand Delacretaz</li><li>Chris A. Mattmann</li><li>Dave Meikle</li><li>Erik Hetzner</li><li>Felix Meschberger</li><li>Jukka Zitting</li><li>Julien Nioche</li><li>Ken Krugler </li><li>Luke Nezda</li><li>Maxim Valyanskiy</li><li>Niall Pemberton</li><li>Peter Wolanin </li><li>Piotr B.</li><li>Sami Siren</li><li>Yuan-Fang Li</li></ul><p>See <a class="externalLink" href="http://tinyurl.com/yc3dk67">http://tinyurl.com/yc3dk67</a> for more details on these contributions.</p></div>
       </div>
-    </div>
-    <div id="leftColumn">
-      <div id="navcolumn">
-                 <h5>Apache Tika</h5>
+      <div id="sidebar">
+        <div id="navigation">
+                    <h5>Apache Tika</h5>
             <ul>
               
     <li class="none">
@@ -147,7 +117,7 @@
               <h5>Documentation</h5>
             <ul>
               
-          
+                
                     
                   
                   
@@ -155,35 +125,9 @@
                   
                   
               
-        <li class="expanded">
-                    <a href="../0.7/index.html">Tika 0.7</a>
-                  <ul>
-                  
-    <li class="none">
-                    <a href="../0.7/gettingstarted.html">Getting Started</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/formats.html">Supported Formats</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/parser.html">Parser API</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/parser_guide.html">Parser 5min Quick Start Guide</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/detection.html">Content and Language Detection</a>
-          </li>
-                  
-    <li class="none">
-                    <a href="../0.7/api/">API Documentation</a>
-          </li>
-              </ul>
-        </li>
+        <li class="collapsed">
+                    <a href="../0.7/index.html">Apache Tika 0.7</a>
+                </li>
               
                 
                     
@@ -192,7 +136,7 @@
                   
               
             <li class="expanded">
-              <strong>Tika 0.6</strong>
+              <strong>Apache Tika 0.6</strong>
                 <ul>
                   
     <li class="none">
@@ -220,36 +164,47 @@
                   
               
         <li class="collapsed">
-                    <a href="../0.5/index.html">Tika 0.5</a>
+                    <a href="../0.5/index.html">Apache Tika 0.5</a>
                 </li>
           </ul>
-            </div>
-      <div id="bookpromo">  
-        <a href="http://manning.com/mattmann/" title="Tika in Action"
-           ><img src="../mattmann_cover150.jpg"
-                 border="0" width="150" height="186"/></a>
-      </div>
-    </div>
-    <div id="bodyColumn">
-      <div id="contentBox">
-        <!-- Licensed to the Apache Software Foundation (ASF) under one or more --><!-- contributor license agreements.  See the NOTICE file distributed with --><!-- this work for additional information regarding copyright ownership. --><!-- The ASF licenses this file to You under the Apache License, Version 2.0 --><!-- (the "License"); you may not use this file except in compliance with --><!-- the License.  You may obtain a copy of the License at --><!--  --><!-- http://www.apache.org/licenses/LICENSE-2.0 --><!--  --><!-- Unless required by applicable law or agreed to in writing, software --><!-- distributed under the License is distributed on an "AS IS" BASIS, --><!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. --><!-- See the License for the specific language governing permissions and --><!-- limitations under the License. --><div class="section"><h2>Apache Tika 0.6<a name="Apache_Tika_0.6"></a></h2><p>The most notable changes in Tika 0.6 ove
 r the previous release are:</p><ul><li>Mime-type detection for HTML (and all types) has been improved, allowing malformed HTML files and those HTML files that require a bit more observed content before the type is properly detected, are now correctly identified by the AutoDetectParser. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-327">TIKA-327</a>, <a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-357">TIKA-357</a>, <a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-366">TIKA-366</a>, <a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-367">TIKA-367</a>)</li><li>Tika now has an additional OSGi bundle packaging that includes all the required parser libraries. This bundle package makes it easy to use all Tika features in an OSGi environment. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-340">TIKA-340</a>, <a class="externalLink" href="https://issues.apache
 .org/jira/browse/TIKA-342">TIKA-342</a>)</li><li>The Apache POI dependency used for parsing Microsoft Office file formats has been upgraded to version 3.6. The most visible improvement in this version is the notably reduced ooxml jar file size. The tika-app jar size is now down to 15MB from the 25MB in Tika 0.5. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-353">TIKA-353</a>)</li><li>Handling of character encoding information in input metadata and HTML &lt;meta&gt; tags has been improved. When no applicable encoding information is available, the encoding is detected by looking at the input data. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-332">TIKA-332</a>, <a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-334">TIKA-334</a>, <a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-335">TIKA-335</a>, <a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-341">TIK
 A-341</a>) </li><li>Some document types like Excel spreadsheets contain content like numbers or formulas whose exact text format depends on the current locale. So far Tika has used the platform default locale in such cases, but clients can now explicitly specify the locale by passing a Locale instance in the parse context. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-125">TIKA-125</a>)</li><li>The default text output encoding of the tika-app jar is now UTF-8 when running on Mac OS X. This is because the default encoding used by Java is not compatible with the console application in Mac OS X. On all other platforms the text output from tika-app still uses the platform default encoding. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-324">TIKA-324</a>)</li><li>A flash video (video/x-flv) parser has been added. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-328">TIKA-328</a>)</li><li>The handling 
 of Number and Date cell formatting within the Microsoft Excel documents has been added. This include currencies, percentages and scientific formats. (<a class="externalLink" href="https://issues.apache.org/jira/browse/TIKA-103">TIKA-103</a>)</li></ul><p>The following people have contributed to Tika 0.6 by submitting or commenting on the issues resolved in this release:</p><ul><li>Andrzej Bialecki</li><li>Bertrand Delacretaz</li><li>Chris A. Mattmann</li><li>Dave Meikle</li><li>Erik Hetzner</li><li>Felix Meschberger</li><li>Jukka Zitting</li><li>Julien Nioche</li><li>Ken Krugler </li><li>Luke Nezda</li><li>Maxim Valyanskiy</li><li>Niall Pemberton</li><li>Peter Wolanin </li><li>Piotr B.</li><li>Sami Siren</li><li>Yuan-Fang Li</li></ul><p>See <a class="externalLink" href="http://tinyurl.com/yc3dk67">http://tinyurl.com/yc3dk67</a> for more details on these contributions.</p></div>
+      
+          <div id="search">
+            <h5>Search with Apache Solr</h5>
+            <form action="http://search.lucidimagination.com/p:tika"
+                  method="get" id="searchform">
+              <input type="text" id="query" name="q"/>
+              <select name="searchProvider" id="searchProvider">
+                <option value="any">provider</option>
+                <option value="lucid">Lucid Find</option>
+                <option value="sl">Search-Lucene</option>
+              </select>
+              <input type="submit" id="submit" value="Search" name="Search"
+                     onclick="selectProvider(this.form)"/>
+            </form>
+          </div>
+
+          <div id="bookpromo">
+            <h5>Books about Tika</h5>
+            <p>
+              <a href="http://manning.com/mattmann/" title="Tika in Action"
+                ><img src="../mattmann_cover150.jpg"
+                      width="150" height="186"/></a>
+            </p>
+          </div>
+        </div>
       </div>
-    </div>
-    <div class="clear">
-      <hr/>
-    </div>
-    <div id="footer">
-      <p>
-        Copyright 2010
-        <a href="http://www.apache.org/">The Apache Software Foundation</a>.
-        Site powered by <a href="http://maven.apache.org/">Apache Maven</a>. 
-        Search powered by <a href="http://www.lucidimagination.com">Lucid Imagination</a> & <a href="http://sematext.com">Sematext</a>.
-        <br/>
-        Apache Tika, Tika, Apache, the Apache feather logo, and the Apache
-        Tika project logo are trademarks of The Apache Software Foundation.
-      </p>
-      <div class="clear">
-        <hr/>
+      <div id="footer">
+        <p>
+          Copyright &#169; 2010
+          <a href="http://www.apache.org/">The Apache Software Foundation</a>.
+          Site powered by <a href="http://maven.apache.org/">Apache Maven</a>. 
+          Search powered by
+          <a href="http://www.lucidimagination.com">Lucid Imagination</a>
+          and <a href="http://sematext.com">Sematext</a>.
+          <br/>
+          Apache Tika, Tika, Apache, the Apache feather logo, and the Apache
+          Tika project logo are trademarks of The Apache Software Foundation.
+        </p>
       </div>
     </div>
   </body>