You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2007/11/18 23:21:13 UTC

svn commit: r596146 [30/36] - in /incubator/tika/site: ./ apidocs/ apidocs/org/ apidocs/org/apache/ apidocs/org/apache/tika/ apidocs/org/apache/tika/config/ apidocs/org/apache/tika/config/class-use/ apidocs/org/apache/tika/exception/ apidocs/org/apache...

Added: incubator/tika/site/xref/org/apache/tika/mime/Patterns.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/mime/Patterns.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/mime/Patterns.html (added)
+++ incubator/tika/site/xref/org/apache/tika/mime/Patterns.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,191 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>Patterns xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/mime/Patterns.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a>   <em class="jxr_javadoccomment">/**</em>
+<a name="2" href="#2">2</a>   <em class="jxr_javadoccomment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a>   <em class="jxr_javadoccomment"> * contributor license agreements.  See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a>   <em class="jxr_javadoccomment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a>   <em class="jxr_javadoccomment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a>   <em class="jxr_javadoccomment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a>   <em class="jxr_javadoccomment"> * the License.  You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a>   <em class="jxr_javadoccomment"> *</em>
+<a name="9" href="#9">9</a>   <em class="jxr_javadoccomment"> *     <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a>  <em class="jxr_javadoccomment"> *</em>
+<a name="11" href="#11">11</a>  <em class="jxr_javadoccomment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a>  <em class="jxr_javadoccomment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a>  <em class="jxr_javadoccomment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a>  <em class="jxr_javadoccomment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a>  <em class="jxr_javadoccomment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="17" href="#17">17</a>  <strong class="jxr_keyword">package</strong> org.apache.tika.mime;
+<a name="18" href="#18">18</a>  
+<a name="19" href="#19">19</a>  <em class="jxr_comment">// JDK imports</em>
+<a name="20" href="#20">20</a>  <strong class="jxr_keyword">import</strong> java.util.Comparator;
+<a name="21" href="#21">21</a>  <strong class="jxr_keyword">import</strong> java.util.HashMap;
+<a name="22" href="#22">22</a>  <strong class="jxr_keyword">import</strong> java.util.Map;
+<a name="23" href="#23">23</a>  <strong class="jxr_keyword">import</strong> java.util.SortedMap;
+<a name="24" href="#24">24</a>  <strong class="jxr_keyword">import</strong> java.util.TreeMap;
+<a name="25" href="#25">25</a>  
+<a name="26" href="#26">26</a>  <em class="jxr_javadoccomment">/**</em>
+<a name="27" href="#27">27</a>  <em class="jxr_javadoccomment"> * Defines a MimeType pattern.</em>
+<a name="28" href="#28">28</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="29" href="#29">29</a>  <strong class="jxr_keyword">class</strong> <a href="../../../../org/apache/tika/mime/Patterns.html">Patterns</a> {
+<a name="30" href="#30">30</a>  
+<a name="31" href="#31">31</a>      <em class="jxr_javadoccomment">/**</em>
+<a name="32" href="#32">32</a>  <em class="jxr_javadoccomment">     * Index of exact name patterns.</em>
+<a name="33" href="#33">33</a>  <em class="jxr_javadoccomment">     */</em>
+<a name="34" href="#34">34</a>      <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">final</strong> Map&lt;String, MimeType&gt; names = <strong class="jxr_keyword">new</strong> HashMap&lt;String, MimeType&gt;();
+<a name="35" href="#35">35</a>  
+<a name="36" href="#36">36</a>      <em class="jxr_javadoccomment">/**</em>
+<a name="37" href="#37">37</a>  <em class="jxr_javadoccomment">     * Index of extension patterns of the form "*extension".</em>
+<a name="38" href="#38">38</a>  <em class="jxr_javadoccomment">     */</em>
+<a name="39" href="#39">39</a>      <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">final</strong> Map&lt;String, MimeType&gt; extensions =
+<a name="40" href="#40">40</a>          <strong class="jxr_keyword">new</strong> HashMap&lt;String, MimeType&gt;();
+<a name="41" href="#41">41</a>  
+<a name="42" href="#42">42</a>      <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">int</strong> minExtensionLength = Integer.MAX_VALUE;
+<a name="43" href="#43">43</a>  
+<a name="44" href="#44">44</a>      <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">int</strong> maxExtensionLength = 0;
+<a name="45" href="#45">45</a>  
+<a name="46" href="#46">46</a>      <em class="jxr_javadoccomment">/**</em>
+<a name="47" href="#47">47</a>  <em class="jxr_javadoccomment">     * Index of generic glob patterns, sorted by length.</em>
+<a name="48" href="#48">48</a>  <em class="jxr_javadoccomment">     */</em>
+<a name="49" href="#49">49</a>      <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">final</strong> SortedMap&lt;String, MimeType&gt; globs =
+<a name="50" href="#50">50</a>          <strong class="jxr_keyword">new</strong> TreeMap&lt;String, MimeType&gt;(<strong class="jxr_keyword">new</strong> Comparator&lt;String&gt;() {
+<a name="51" href="#51">51</a>              <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">int</strong> compare(String a, String b) {
+<a name="52" href="#52">52</a>                  <strong class="jxr_keyword">int</strong> diff = b.length() - a.length();
+<a name="53" href="#53">53</a>                  <strong class="jxr_keyword">if</strong> (diff == 0) {
+<a name="54" href="#54">54</a>                      diff = a.compareTo(b);
+<a name="55" href="#55">55</a>                  }
+<a name="56" href="#56">56</a>                  <strong class="jxr_keyword">return</strong> diff;
+<a name="57" href="#57">57</a>              }
+<a name="58" href="#58">58</a>          });
+<a name="59" href="#59">59</a>  
+<a name="60" href="#60">60</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> add(String pattern, <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type) <strong class="jxr_keyword">throws</strong> MimeTypeException {
+<a name="61" href="#61">61</a>          assert pattern != <strong class="jxr_keyword">null</strong> &amp;&amp; type != <strong class="jxr_keyword">null</strong>;
+<a name="62" href="#62">62</a>  
+<a name="63" href="#63">63</a>          <strong class="jxr_keyword">if</strong> (pattern.indexOf('*') == -1
+<a name="64" href="#64">64</a>                  &amp;&amp; pattern.indexOf('?') == -1
+<a name="65" href="#65">65</a>                  &amp;&amp; pattern.indexOf('[') == -1) {
+<a name="66" href="#66">66</a>              addName(pattern, type);
+<a name="67" href="#67">67</a>          } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (pattern.startsWith(<span class="jxr_string">"*"</span>)
+<a name="68" href="#68">68</a>                  &amp;&amp; pattern.indexOf('*', 1) == -1
+<a name="69" href="#69">69</a>                  &amp;&amp; pattern.indexOf('?') == -1
+<a name="70" href="#70">70</a>                  &amp;&amp; pattern.indexOf('[') == -1) {
+<a name="71" href="#71">71</a>              addExtension(pattern.substring(1), type);
+<a name="72" href="#72">72</a>          } <strong class="jxr_keyword">else</strong> {
+<a name="73" href="#73">73</a>              addGlob(compile(pattern), type);
+<a name="74" href="#74">74</a>          }
+<a name="75" href="#75">75</a>      }
+<a name="76" href="#76">76</a>  
+<a name="77" href="#77">77</a>      <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">void</strong> addName(String name, <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type) <strong class="jxr_keyword">throws</strong> MimeTypeException {
+<a name="78" href="#78">78</a>          <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> previous = names.get(name);
+<a name="79" href="#79">79</a>          <strong class="jxr_keyword">if</strong> (previous == <strong class="jxr_keyword">null</strong> || previous.isDescendantOf(type)) {
+<a name="80" href="#80">80</a>              names.put(name, type);
+<a name="81" href="#81">81</a>          } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (previous == type || type.isDescendantOf(previous)) {
+<a name="82" href="#82">82</a>              <em class="jxr_comment">// do nothing</em>
+<a name="83" href="#83">83</a>          } <strong class="jxr_keyword">else</strong> {
+<a name="84" href="#84">84</a>              <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/mime/MimeTypeException.html">MimeTypeException</a>(<span class="jxr_string">"Conflicting name pattern: "</span> + name);
+<a name="85" href="#85">85</a>          }
+<a name="86" href="#86">86</a>      }
+<a name="87" href="#87">87</a>  
+<a name="88" href="#88">88</a>      <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">void</strong> addExtension(String extension, <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type)
+<a name="89" href="#89">89</a>              <strong class="jxr_keyword">throws</strong> <a href="../../../../org/apache/tika/mime/MimeTypeException.html">MimeTypeException</a> {
+<a name="90" href="#90">90</a>          <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> previous = extensions.get(extension);
+<a name="91" href="#91">91</a>          <strong class="jxr_keyword">if</strong> (previous == <strong class="jxr_keyword">null</strong> || previous.isDescendantOf(type)) {
+<a name="92" href="#92">92</a>              extensions.put(extension, type);
+<a name="93" href="#93">93</a>              <strong class="jxr_keyword">int</strong> length = extension.length();
+<a name="94" href="#94">94</a>              minExtensionLength = Math.min(minExtensionLength, length);
+<a name="95" href="#95">95</a>              maxExtensionLength = Math.max(maxExtensionLength, length);
+<a name="96" href="#96">96</a>          } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (previous == type || type.isDescendantOf(previous)) {
+<a name="97" href="#97">97</a>              <em class="jxr_comment">// do nothing</em>
+<a name="98" href="#98">98</a>          } <strong class="jxr_keyword">else</strong> {
+<a name="99" href="#99">99</a>              <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/mime/MimeTypeException.html">MimeTypeException</a>(
+<a name="100" href="#100">100</a>                     <span class="jxr_string">"Conflicting extension pattern: "</span> + extension);
+<a name="101" href="#101">101</a>         }
+<a name="102" href="#102">102</a>     }
+<a name="103" href="#103">103</a> 
+<a name="104" href="#104">104</a>     <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">void</strong> addGlob(String glob, <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type)
+<a name="105" href="#105">105</a>             <strong class="jxr_keyword">throws</strong> <a href="../../../../org/apache/tika/mime/MimeTypeException.html">MimeTypeException</a> {
+<a name="106" href="#106">106</a>         <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> previous = globs.get(glob);
+<a name="107" href="#107">107</a>         <strong class="jxr_keyword">if</strong> (previous == <strong class="jxr_keyword">null</strong> || previous.isDescendantOf(type)) {
+<a name="108" href="#108">108</a>             extensions.put(glob, type);
+<a name="109" href="#109">109</a>         } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (previous == type || type.isDescendantOf(previous)) {
+<a name="110" href="#110">110</a>             <em class="jxr_comment">// do nothing</em>
+<a name="111" href="#111">111</a>         } <strong class="jxr_keyword">else</strong> {
+<a name="112" href="#112">112</a>             <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/mime/MimeTypeException.html">MimeTypeException</a>(<span class="jxr_string">"Conflicting glob pattern: "</span> + glob);
+<a name="113" href="#113">113</a>         }
+<a name="114" href="#114">114</a>     }
+<a name="115" href="#115">115</a> 
+<a name="116" href="#116">116</a>     <em class="jxr_javadoccomment">/**</em>
+<a name="117" href="#117">117</a> <em class="jxr_javadoccomment">     * Find the MimeType corresponding to a resource name.</em>
+<a name="118" href="#118">118</a> <em class="jxr_javadoccomment">     * </em>
+<a name="119" href="#119">119</a> <em class="jxr_javadoccomment">     * It applies the recommendations detailed in FreeDesktop Shared MIME-info</em>
+<a name="120" href="#120">120</a> <em class="jxr_javadoccomment">     * Database for guessing MimeType from a resource name: It first tries a</em>
+<a name="121" href="#121">121</a> <em class="jxr_javadoccomment">     * case-sensitive match, then try again with the resource name converted to</em>
+<a name="122" href="#122">122</a> <em class="jxr_javadoccomment">     * lower-case if that fails. If several patterns match then the longest</em>
+<a name="123" href="#123">123</a> <em class="jxr_javadoccomment">     * pattern is used. In particular, files with multiple extensions (such as</em>
+<a name="124" href="#124">124</a> <em class="jxr_javadoccomment">     * Data.tar.gz) match the longest sequence of extensions (eg '*.tar.gz' in</em>
+<a name="125" href="#125">125</a> <em class="jxr_javadoccomment">     * preference to '*.gz'). Literal patterns (eg, 'Makefile') are matched</em>
+<a name="126" href="#126">126</a> <em class="jxr_javadoccomment">     * before all others. Patterns beginning with `*.' and containing no other</em>
+<a name="127" href="#127">127</a> <em class="jxr_javadoccomment">     * special characters (`*?[') are matched before other wildcarded patterns</em>
+<a name="128" href="#128">128</a> <em class="jxr_javadoccomment">     * (since this covers the majority of the patterns).</em>
+<a name="129" href="#129">129</a> <em class="jxr_javadoccomment">     */</em>
+<a name="130" href="#130">130</a>     <strong class="jxr_keyword">public</strong> <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> matches(String name) {
+<a name="131" href="#131">131</a>         assert name != <strong class="jxr_keyword">null</strong>;
+<a name="132" href="#132">132</a> 
+<a name="133" href="#133">133</a>         <em class="jxr_comment">// First, try exact match of the provided resource name</em>
+<a name="134" href="#134">134</a>         <strong class="jxr_keyword">if</strong> (names.containsKey(name)) {
+<a name="135" href="#135">135</a>             <strong class="jxr_keyword">return</strong> names.get(name);
+<a name="136" href="#136">136</a>         }
+<a name="137" href="#137">137</a> 
+<a name="138" href="#138">138</a>         <em class="jxr_comment">// Then try "extension" (*.xxx) matching</em>
+<a name="139" href="#139">139</a>         <strong class="jxr_keyword">int</strong> maxLength = Math.min(maxExtensionLength, name.length());
+<a name="140" href="#140">140</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> n = maxLength; n &gt;= minExtensionLength; n--) {
+<a name="141" href="#141">141</a>             String extension = name.substring(name.length() - n);
+<a name="142" href="#142">142</a>             <strong class="jxr_keyword">if</strong> (extensions.containsKey(extension)) {
+<a name="143" href="#143">143</a>                 <strong class="jxr_keyword">return</strong> extensions.get(extension);
+<a name="144" href="#144">144</a>             }
+<a name="145" href="#145">145</a>         }
+<a name="146" href="#146">146</a> 
+<a name="147" href="#147">147</a>         <em class="jxr_comment">// And finally, try complex regexp matching</em>
+<a name="148" href="#148">148</a>         <strong class="jxr_keyword">for</strong> (Map.Entry&lt;String, MimeType&gt; entry : globs.entrySet()) {
+<a name="149" href="#149">149</a>             <strong class="jxr_keyword">if</strong> (name.matches(entry.getKey())) {
+<a name="150" href="#150">150</a>                 <strong class="jxr_keyword">return</strong> entry.getValue();
+<a name="151" href="#151">151</a>             }
+<a name="152" href="#152">152</a>         }
+<a name="153" href="#153">153</a> 
+<a name="154" href="#154">154</a>         <strong class="jxr_keyword">return</strong> <strong class="jxr_keyword">null</strong>;
+<a name="155" href="#155">155</a>     }
+<a name="156" href="#156">156</a> 
+<a name="157" href="#157">157</a>     <strong class="jxr_keyword">private</strong> String compile(String glob) {
+<a name="158" href="#158">158</a>         StringBuilder pattern = <strong class="jxr_keyword">new</strong> StringBuilder();
+<a name="159" href="#159">159</a>         pattern.append(<span class="jxr_string">"&#92;&#92;A"</span>);
+<a name="160" href="#160">160</a>         <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i &lt; glob.length(); i++) {
+<a name="161" href="#161">161</a>             <strong class="jxr_keyword">char</strong> ch = glob.charAt(i);
+<a name="162" href="#162">162</a>             <strong class="jxr_keyword">if</strong> (ch == '?') {
+<a name="163" href="#163">163</a>                 pattern.append('.');
+<a name="164" href="#164">164</a>             } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (ch == '*') {
+<a name="165" href="#165">165</a>                 pattern.append(<span class="jxr_string">".*"</span>);
+<a name="166" href="#166">166</a>             } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (<span class="jxr_string">"&#92;&#92;[]^.-$+(){}|"</span>.indexOf(ch) != -1) {
+<a name="167" href="#167">167</a>                 pattern.append('&#92;&#92;');
+<a name="168" href="#168">168</a>                 pattern.append(ch);
+<a name="169" href="#169">169</a>             } <strong class="jxr_keyword">else</strong> {
+<a name="170" href="#170">170</a>                 pattern.append(ch);
+<a name="171" href="#171">171</a>             }
+<a name="172" href="#172">172</a>         }
+<a name="173" href="#173">173</a>         pattern.append(<span class="jxr_string">"&#92;&#92;z"</span>);
+<a name="174" href="#174">174</a>         <strong class="jxr_keyword">return</strong> pattern.toString();
+<a name="175" href="#175">175</a>     }
+<a name="176" href="#176">176</a> 
+<a name="177" href="#177">177</a> }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+

Added: incubator/tika/site/xref/org/apache/tika/mime/package-frame.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/mime/package-frame.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/mime/package-frame.html (added)
+++ incubator/tika/site/xref/org/apache/tika/mime/package-frame.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,72 @@
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "DTD/xhtml1-transitional.dtd">
+<html xml:lang="en" lang="en">
+	<head>
+		<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+		<title>Apache Tika 0.1-SNAPSHOT Reference Package org.apache.tika.mime</title>
+		<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="style" />
+	</head>
+	<body>
+
+		<h3>
+        	<a href="package-summary.html" target="classFrame">org.apache.tika.mime</a>
+      	</h3>
+
+      	<h3>Classes</h3>
+
+      	<ul>
+      		          	<li>
+            	<a href="Operator.html" target="classFrame">And</a>
+          	</li>
+          	          	<li>
+            	<a href="Clause.html" target="classFrame">Clause</a>
+          	</li>
+          	          	<li>
+            	<a href="Clause.html" target="classFrame">False</a>
+          	</li>
+          	          	<li>
+            	<a href="HexCoDec.html" target="classFrame">HexCoDec</a>
+          	</li>
+          	          	<li>
+            	<a href="Magic.html" target="classFrame">Magic</a>
+          	</li>
+          	          	<li>
+            	<a href="MagicClause.html" target="classFrame">MagicClause</a>
+          	</li>
+          	          	<li>
+            	<a href="MagicMatch.html" target="classFrame">MagicMatch</a>
+          	</li>
+          	          	<li>
+            	<a href="MimeType.html" target="classFrame">MimeType</a>
+          	</li>
+          	          	<li>
+            	<a href="MimeTypeException.html" target="classFrame">MimeTypeException</a>
+          	</li>
+          	          	<li>
+            	<a href="MimeTypes.html" target="classFrame">MimeTypes</a>
+          	</li>
+          	          	<li>
+            	<a href="MimeTypesFactory.html" target="classFrame">MimeTypesFactory</a>
+          	</li>
+          	          	<li>
+            	<a href="MimeTypesReader.html" target="classFrame">MimeTypesReader</a>
+          	</li>
+          	          	<li>
+            	<a href="Operator.html" target="classFrame">Operator</a>
+          	</li>
+          	          	<li>
+            	<a href="Operator.html" target="classFrame">Or</a>
+          	</li>
+          	          	<li>
+            	<a href="Patterns.html" target="classFrame">Patterns</a>
+          	</li>
+          	          	<li>
+            	<a href="MimeType.html" target="classFrame">RootXML</a>
+          	</li>
+          	          	<li>
+            	<a href="Clause.html" target="classFrame">True</a>
+          	</li>
+          	      	</ul>
+
+	</body>
+</html>
\ No newline at end of file

Added: incubator/tika/site/xref/org/apache/tika/mime/package-summary.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/mime/package-summary.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/mime/package-summary.html (added)
+++ incubator/tika/site/xref/org/apache/tika/mime/package-summary.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,147 @@
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "DTD/xhtml1-transitional.dtd">
+<html xml:lang="en" lang="en">
+	<head>
+		<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+		<title>Apache Tika 0.1-SNAPSHOT Reference Package org.apache.tika.mime</title>
+		<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="style" />
+	</head>
+	<body>
+		      	<div class="overview">
+        	<ul>
+          		<li>
+            		<a href="../../../../overview-summary.html">Overview</a>
+          		</li>
+          		<li class="selected">Package</li>
+        	</ul>
+      	</div>
+      	<div class="framenoframe">
+        	<ul>
+          		<li>
+            		<a href="../../../../index.html" target="_top">FRAMES</a>
+          		</li>
+          		<li>
+            		<a href="package-summary.html" target="_top">NO FRAMES</a>
+          		</li>
+        	</ul>
+      	</div>
+		
+		      	<h2>Package org.apache.tika.mime</h2>
+
+		<table class="summary">
+        	<thead>
+          		<tr>
+            		<th>Class Summary</th>
+          		</tr>
+        	</thead>
+        	<tbody>
+        		            	<tr>
+              		<td>
+                		<a href="Operator.html" target="classFrame">And</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="Clause.html" target="classFrame">Clause</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="Clause.html" target="classFrame">False</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="HexCoDec.html" target="classFrame">HexCoDec</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="Magic.html" target="classFrame">Magic</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="MagicClause.html" target="classFrame">MagicClause</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="MagicMatch.html" target="classFrame">MagicMatch</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="MimeType.html" target="classFrame">MimeType</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="MimeTypeException.html" target="classFrame">MimeTypeException</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="MimeTypes.html" target="classFrame">MimeTypes</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="MimeTypesFactory.html" target="classFrame">MimeTypesFactory</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="MimeTypesReader.html" target="classFrame">MimeTypesReader</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="Operator.html" target="classFrame">Operator</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="Operator.html" target="classFrame">Or</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="Patterns.html" target="classFrame">Patterns</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="MimeType.html" target="classFrame">RootXML</a>
+              		</td>
+            	</tr>
+				            	<tr>
+              		<td>
+                		<a href="Clause.html" target="classFrame">True</a>
+              		</td>
+            	</tr>
+				        	</tbody>
+      	</table>
+		
+		      	<div class="overview">
+        	<ul>
+          		<li>
+            		<a href="../../../../overview-summary.html">Overview</a>
+          		</li>
+          		<li class="selected">Package</li>
+        	</ul>
+      	</div>
+      	<div class="framenoframe">
+        	<ul>
+          		<li>
+            		<a href="../../../../index.html" target="_top">FRAMES</a>
+          		</li>
+          		<li>
+            		<a href="package-summary.html" target="_top">NO FRAMES</a>
+          		</li>
+        	</ul>
+      	</div>
+				<hr />
+		Copyright &copy; 2007 The Apache Software Foundation. All Rights Reserved.
+	</body>
+</html>
\ No newline at end of file

Added: incubator/tika/site/xref/org/apache/tika/parser/AutoDetectParser.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/parser/AutoDetectParser.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/parser/AutoDetectParser.html (added)
+++ incubator/tika/site/xref/org/apache/tika/parser/AutoDetectParser.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,188 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>AutoDetectParser xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/parser/AutoDetectParser.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a>   <em class="jxr_javadoccomment">/**</em>
+<a name="2" href="#2">2</a>   <em class="jxr_javadoccomment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a>   <em class="jxr_javadoccomment"> * contributor license agreements.  See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a>   <em class="jxr_javadoccomment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a>   <em class="jxr_javadoccomment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a>   <em class="jxr_javadoccomment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a>   <em class="jxr_javadoccomment"> * the License.  You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a>   <em class="jxr_javadoccomment"> *</em>
+<a name="9" href="#9">9</a>   <em class="jxr_javadoccomment"> *     <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a>  <em class="jxr_javadoccomment"> *</em>
+<a name="11" href="#11">11</a>  <em class="jxr_javadoccomment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a>  <em class="jxr_javadoccomment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a>  <em class="jxr_javadoccomment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a>  <em class="jxr_javadoccomment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a>  <em class="jxr_javadoccomment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="17" href="#17">17</a>  <strong class="jxr_keyword">package</strong> org.apache.tika.parser;
+<a name="18" href="#18">18</a>  
+<a name="19" href="#19">19</a>  <strong class="jxr_keyword">import</strong> java.io.BufferedInputStream;
+<a name="20" href="#20">20</a>  <strong class="jxr_keyword">import</strong> java.io.ByteArrayOutputStream;
+<a name="21" href="#21">21</a>  <strong class="jxr_keyword">import</strong> java.io.IOException;
+<a name="22" href="#22">22</a>  <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="23" href="#23">23</a>  
+<a name="24" href="#24">24</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.config.TikaConfig;
+<a name="25" href="#25">25</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.exception.TikaException;
+<a name="26" href="#26">26</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="27" href="#27">27</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.mime.MimeType;
+<a name="28" href="#28">28</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.mime.MimeTypeException;
+<a name="29" href="#29">29</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.mime.MimeTypes;
+<a name="30" href="#30">30</a>  <strong class="jxr_keyword">import</strong> org.jdom.JDOMException;
+<a name="31" href="#31">31</a>  <strong class="jxr_keyword">import</strong> org.xml.sax.ContentHandler;
+<a name="32" href="#32">32</a>  <strong class="jxr_keyword">import</strong> org.xml.sax.SAXException;
+<a name="33" href="#33">33</a>  
+<a name="34" href="#34">34</a>  <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../org/apache/tika/parser/AutoDetectParser.html">AutoDetectParser</a> implements <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> {
+<a name="35" href="#35">35</a>  
+<a name="36" href="#36">36</a>      <strong class="jxr_keyword">private</strong> <a href="../../../../org/apache/tika/config/TikaConfig.html">TikaConfig</a> config;
+<a name="37" href="#37">37</a>  
+<a name="38" href="#38">38</a>      <em class="jxr_javadoccomment">/**</em>
+<a name="39" href="#39">39</a>  <em class="jxr_javadoccomment">     * Creates an auto-detecting parser instance using the default Tika</em>
+<a name="40" href="#40">40</a>  <em class="jxr_javadoccomment">     * configuration.</em>
+<a name="41" href="#41">41</a>  <em class="jxr_javadoccomment">     */</em>
+<a name="42" href="#42">42</a>      <strong class="jxr_keyword">public</strong> <a href="../../../../org/apache/tika/parser/AutoDetectParser.html">AutoDetectParser</a>() {
+<a name="43" href="#43">43</a>          <strong class="jxr_keyword">try</strong> {
+<a name="44" href="#44">44</a>              config = TikaConfig.getDefaultConfig();
+<a name="45" href="#45">45</a>          } <strong class="jxr_keyword">catch</strong> (IOException e) {
+<a name="46" href="#46">46</a>              <em class="jxr_comment">// FIXME: This should never happen</em>
+<a name="47" href="#47">47</a>              <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> RuntimeException(e);
+<a name="48" href="#48">48</a>          } <strong class="jxr_keyword">catch</strong> (JDOMException e) {
+<a name="49" href="#49">49</a>              <em class="jxr_comment">// FIXME: This should never happen</em>
+<a name="50" href="#50">50</a>              <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> RuntimeException(e);
+<a name="51" href="#51">51</a>          }
+<a name="52" href="#52">52</a>      }
+<a name="53" href="#53">53</a>  
+<a name="54" href="#54">54</a>      <strong class="jxr_keyword">public</strong> <a href="../../../../org/apache/tika/parser/AutoDetectParser.html">AutoDetectParser</a>(<a href="../../../../org/apache/tika/config/TikaConfig.html">TikaConfig</a> config) {
+<a name="55" href="#55">55</a>          <strong class="jxr_keyword">this</strong>.config = config;
+<a name="56" href="#56">56</a>      }
+<a name="57" href="#57">57</a>  
+<a name="58" href="#58">58</a>      <strong class="jxr_keyword">public</strong> <a href="../../../../org/apache/tika/config/TikaConfig.html">TikaConfig</a> getConfig() {
+<a name="59" href="#59">59</a>          <strong class="jxr_keyword">return</strong> config;
+<a name="60" href="#60">60</a>      }
+<a name="61" href="#61">61</a>  
+<a name="62" href="#62">62</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> setConfig(<a href="../../../../org/apache/tika/config/TikaConfig.html">TikaConfig</a> config) {
+<a name="63" href="#63">63</a>          <strong class="jxr_keyword">this</strong>.config = config;
+<a name="64" href="#64">64</a>      }
+<a name="65" href="#65">65</a>  
+<a name="66" href="#66">66</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> parse(
+<a name="67" href="#67">67</a>              InputStream stream, ContentHandler handler, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="68" href="#68">68</a>              <strong class="jxr_keyword">throws</strong> IOException, SAXException, <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a> {
+<a name="69" href="#69">69</a>          <em class="jxr_comment">// We need buffering to enable MIME magic detection before parsing</em>
+<a name="70" href="#70">70</a>          <strong class="jxr_keyword">if</strong> (!stream.markSupported()) {
+<a name="71" href="#71">71</a>              stream = <strong class="jxr_keyword">new</strong> BufferedInputStream(stream);
+<a name="72" href="#72">72</a>          }
+<a name="73" href="#73">73</a>  
+<a name="74" href="#74">74</a>          <em class="jxr_comment">// Automatically detect the MIME type of the document </em>
+<a name="75" href="#75">75</a>          <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type = getMimeType(stream, metadata);
+<a name="76" href="#76">76</a>          metadata.set(Metadata.CONTENT_TYPE, type.getName());
+<a name="77" href="#77">77</a>  
+<a name="78" href="#78">78</a>          <em class="jxr_comment">// Get the parser configured for the detected MIME type</em>
+<a name="79" href="#79">79</a>          <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> parser = config.getParser(type.getName());
+<a name="80" href="#80">80</a>          <strong class="jxr_keyword">if</strong> (parser == <strong class="jxr_keyword">null</strong>) {
+<a name="81" href="#81">81</a>              parser = config.getParser(MimeTypes.DEFAULT);
+<a name="82" href="#82">82</a>          }
+<a name="83" href="#83">83</a>          <strong class="jxr_keyword">if</strong> (parser == <strong class="jxr_keyword">null</strong>) {
+<a name="84" href="#84">84</a>              <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a>(<span class="jxr_string">"No parsers available: "</span> + type.getName());
+<a name="85" href="#85">85</a>          }
+<a name="86" href="#86">86</a>  
+<a name="87" href="#87">87</a>          <em class="jxr_comment">// Parse the document</em>
+<a name="88" href="#88">88</a>          parser.parse(stream, handler, metadata);
+<a name="89" href="#89">89</a>      }
+<a name="90" href="#90">90</a>  
+<a name="91" href="#91">91</a>      <em class="jxr_javadoccomment">/**</em>
+<a name="92" href="#92">92</a>  <em class="jxr_javadoccomment">     * Automatically detects the MIME type of a document based on magic</em>
+<a name="93" href="#93">93</a>  <em class="jxr_javadoccomment">     * markers in the stream prefix and any given metadata hints.</em>
+<a name="94" href="#94">94</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a name="95" href="#95">95</a>  <em class="jxr_javadoccomment">     * The given stream is expected to support marks, so that this method</em>
+<a name="96" href="#96">96</a>  <em class="jxr_javadoccomment">     * can reset the stream to the position it was in before this method</em>
+<a name="97" href="#97">97</a>  <em class="jxr_javadoccomment">     * was called.</em>
+<a name="98" href="#98">98</a>  <em class="jxr_javadoccomment">     *</em>
+<a name="99" href="#99">99</a>  <em class="jxr_javadoccomment">     * @param stream document stream</em>
+<a name="100" href="#100">100</a> <em class="jxr_javadoccomment">     * @param metadata metadata hints</em>
+<a name="101" href="#101">101</a> <em class="jxr_javadoccomment">     * @return MIME type of the document</em>
+<a name="102" href="#102">102</a> <em class="jxr_javadoccomment">     * @throws IOException if the document stream could not be read</em>
+<a name="103" href="#103">103</a> <em class="jxr_javadoccomment">     */</em>
+<a name="104" href="#104">104</a>     <strong class="jxr_keyword">private</strong> <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> getMimeType(InputStream stream, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="105" href="#105">105</a>             <strong class="jxr_keyword">throws</strong> IOException {
+<a name="106" href="#106">106</a>         <a href="../../../../org/apache/tika/mime/MimeTypes.html">MimeTypes</a> types = config.getMimeRepository();
+<a name="107" href="#107">107</a> 
+<a name="108" href="#108">108</a>         <em class="jxr_comment">// Get type based on magic prefix</em>
+<a name="109" href="#109">109</a>         stream.mark(types.getMinLength());
+<a name="110" href="#110">110</a>         <strong class="jxr_keyword">try</strong> {
+<a name="111" href="#111">111</a>             byte[] prefix = getPrefix(stream, types.getMinLength());
+<a name="112" href="#112">112</a>             <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type = types.getMimeType(prefix);
+<a name="113" href="#113">113</a>             <strong class="jxr_keyword">if</strong> (type != <strong class="jxr_keyword">null</strong>) {
+<a name="114" href="#114">114</a>                 <strong class="jxr_keyword">return</strong> type;
+<a name="115" href="#115">115</a>             }
+<a name="116" href="#116">116</a>         } <strong class="jxr_keyword">finally</strong> {
+<a name="117" href="#117">117</a>             stream.reset();
+<a name="118" href="#118">118</a>         }
+<a name="119" href="#119">119</a> 
+<a name="120" href="#120">120</a>         <em class="jxr_comment">// Get type based on resourceName hint (if available)</em>
+<a name="121" href="#121">121</a>         String resourceName = metadata.get(Metadata.RESOURCE_NAME_KEY);
+<a name="122" href="#122">122</a>         <strong class="jxr_keyword">if</strong> (resourceName != <strong class="jxr_keyword">null</strong>) {
+<a name="123" href="#123">123</a>             <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type = types.getMimeType(resourceName);
+<a name="124" href="#124">124</a>             <strong class="jxr_keyword">if</strong> (type != <strong class="jxr_keyword">null</strong>) {
+<a name="125" href="#125">125</a>                 <strong class="jxr_keyword">return</strong> type;
+<a name="126" href="#126">126</a>             }
+<a name="127" href="#127">127</a>         }
+<a name="128" href="#128">128</a> 
+<a name="129" href="#129">129</a>         <em class="jxr_comment">// Get type based on metadata hint (if available)</em>
+<a name="130" href="#130">130</a>         String typename = metadata.get(Metadata.CONTENT_TYPE);
+<a name="131" href="#131">131</a>         <strong class="jxr_keyword">if</strong> (typename != <strong class="jxr_keyword">null</strong>) {
+<a name="132" href="#132">132</a>             <strong class="jxr_keyword">try</strong> {
+<a name="133" href="#133">133</a>                 <strong class="jxr_keyword">return</strong> types.forName(typename);
+<a name="134" href="#134">134</a>             } <strong class="jxr_keyword">catch</strong> (MimeTypeException e) {
+<a name="135" href="#135">135</a>                 <em class="jxr_comment">// Malformed type name, ignore</em>
+<a name="136" href="#136">136</a>             }
+<a name="137" href="#137">137</a>         }
+<a name="138" href="#138">138</a> 
+<a name="139" href="#139">139</a>         <em class="jxr_comment">// Finally, use the default type if no matches found</em>
+<a name="140" href="#140">140</a>         <strong class="jxr_keyword">try</strong> {
+<a name="141" href="#141">141</a>             <strong class="jxr_keyword">return</strong> types.forName(MimeTypes.DEFAULT);
+<a name="142" href="#142">142</a>         } <strong class="jxr_keyword">catch</strong> (MimeTypeException e) {
+<a name="143" href="#143">143</a>             <em class="jxr_comment">// Should never happen</em>
+<a name="144" href="#144">144</a>             <strong class="jxr_keyword">return</strong> <strong class="jxr_keyword">null</strong>;
+<a name="145" href="#145">145</a>         }
+<a name="146" href="#146">146</a>     }
+<a name="147" href="#147">147</a> 
+<a name="148" href="#148">148</a>     <em class="jxr_javadoccomment">/**</em>
+<a name="149" href="#149">149</a> <em class="jxr_javadoccomment">     * Reads and returns the first &lt;code&gt;length&lt;/code&gt; bytes from the</em>
+<a name="150" href="#150">150</a> <em class="jxr_javadoccomment">     * given stream. If the stream ends before that, returns all bytes</em>
+<a name="151" href="#151">151</a> <em class="jxr_javadoccomment">     * from the stream.</em>
+<a name="152" href="#152">152</a> <em class="jxr_javadoccomment">     * </em>
+<a name="153" href="#153">153</a> <em class="jxr_javadoccomment">     * @param input input stream</em>
+<a name="154" href="#154">154</a> <em class="jxr_javadoccomment">     * @param length number of bytes to read and return</em>
+<a name="155" href="#155">155</a> <em class="jxr_javadoccomment">     * @return stream prefix</em>
+<a name="156" href="#156">156</a> <em class="jxr_javadoccomment">     * @throws IOException if the stream could not be read</em>
+<a name="157" href="#157">157</a> <em class="jxr_javadoccomment">     */</em>
+<a name="158" href="#158">158</a>     <strong class="jxr_keyword">private</strong> byte[] getPrefix(InputStream input, <strong class="jxr_keyword">int</strong> length) <strong class="jxr_keyword">throws</strong> IOException {
+<a name="159" href="#159">159</a>         ByteArrayOutputStream output = <strong class="jxr_keyword">new</strong> ByteArrayOutputStream();
+<a name="160" href="#160">160</a>         byte[] buffer = <strong class="jxr_keyword">new</strong> byte[Math.min(1024, length)];
+<a name="161" href="#161">161</a>         <strong class="jxr_keyword">int</strong> n = input.read(buffer);
+<a name="162" href="#162">162</a>         <strong class="jxr_keyword">while</strong> (n != -1) {
+<a name="163" href="#163">163</a>             output.write(buffer, 0, n);
+<a name="164" href="#164">164</a>             <strong class="jxr_keyword">int</strong> remaining = length - output.size();
+<a name="165" href="#165">165</a>             <strong class="jxr_keyword">if</strong> (remaining &gt; 0) {
+<a name="166" href="#166">166</a>                 n = input.read(buffer, 0, Math.min(buffer.length, remaining));
+<a name="167" href="#167">167</a>             } <strong class="jxr_keyword">else</strong> {
+<a name="168" href="#168">168</a>                 n = -1;
+<a name="169" href="#169">169</a>             }
+<a name="170" href="#170">170</a>         }
+<a name="171" href="#171">171</a>         <strong class="jxr_keyword">return</strong> output.toByteArray();
+<a name="172" href="#172">172</a>     }
+<a name="173" href="#173">173</a> 
+<a name="174" href="#174">174</a> }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+

Added: incubator/tika/site/xref/org/apache/tika/parser/EmptyParser.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/parser/EmptyParser.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/parser/EmptyParser.html (added)
+++ incubator/tika/site/xref/org/apache/tika/parser/EmptyParser.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,57 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>EmptyParser xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/parser/EmptyParser.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a>   <em class="jxr_comment">/*</em>
+<a name="2" href="#2">2</a>   <em class="jxr_comment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a>   <em class="jxr_comment"> * contributor license agreements.  See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a>   <em class="jxr_comment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a>   <em class="jxr_comment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a>   <em class="jxr_comment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a>   <em class="jxr_comment"> * the License.  You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a>   <em class="jxr_comment"> *</em>
+<a name="9" href="#9">9</a>   <em class="jxr_comment"> *     <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a>  <em class="jxr_comment"> *</em>
+<a name="11" href="#11">11</a>  <em class="jxr_comment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a>  <em class="jxr_comment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a>  <em class="jxr_comment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a>  <em class="jxr_comment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a>  <em class="jxr_comment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a>  <em class="jxr_comment"> */</em>
+<a name="17" href="#17">17</a>  <strong class="jxr_keyword">package</strong> org.apache.tika.parser;
+<a name="18" href="#18">18</a>  
+<a name="19" href="#19">19</a>  <strong class="jxr_keyword">import</strong> java.io.IOException;
+<a name="20" href="#20">20</a>  <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="21" href="#21">21</a>  
+<a name="22" href="#22">22</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.exception.TikaException;
+<a name="23" href="#23">23</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="24" href="#24">24</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.sax.XHTMLContentHandler;
+<a name="25" href="#25">25</a>  <strong class="jxr_keyword">import</strong> org.xml.sax.ContentHandler;
+<a name="26" href="#26">26</a>  <strong class="jxr_keyword">import</strong> org.xml.sax.SAXException;
+<a name="27" href="#27">27</a>  
+<a name="28" href="#28">28</a>  <em class="jxr_javadoccomment">/**</em>
+<a name="29" href="#29">29</a>  <em class="jxr_javadoccomment"> * Dummy parser that always produces an empty XHTML document without even</em>
+<a name="30" href="#30">30</a>  <em class="jxr_javadoccomment"> * attempting to parse the given document stream. Useful as a sentinel parser</em>
+<a name="31" href="#31">31</a>  <em class="jxr_javadoccomment"> * for unknown document types.</em>
+<a name="32" href="#32">32</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="33" href="#33">33</a>  <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../org/apache/tika/parser/EmptyParser.html">EmptyParser</a> implements <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> {
+<a name="34" href="#34">34</a>  
+<a name="35" href="#35">35</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> parse(
+<a name="36" href="#36">36</a>              InputStream stream, ContentHandler handler, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="37" href="#37">37</a>              <strong class="jxr_keyword">throws</strong> IOException, SAXException, <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a> {
+<a name="38" href="#38">38</a>          <a href="../../../../org/apache/tika/sax/XHTMLContentHandler.html">XHTMLContentHandler</a> xhtml = <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/sax/XHTMLContentHandler.html">XHTMLContentHandler</a>(handler, metadata);
+<a name="39" href="#39">39</a>          xhtml.startDocument();
+<a name="40" href="#40">40</a>          xhtml.endDocument();
+<a name="41" href="#41">41</a>      }
+<a name="42" href="#42">42</a>  
+<a name="43" href="#43">43</a>  }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+

Added: incubator/tika/site/xref/org/apache/tika/parser/ErrorParser.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/parser/ErrorParser.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/parser/ErrorParser.html (added)
+++ incubator/tika/site/xref/org/apache/tika/parser/ErrorParser.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,52 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>ErrorParser xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/parser/ErrorParser.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a>   <em class="jxr_comment">/*</em>
+<a name="2" href="#2">2</a>   <em class="jxr_comment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a>   <em class="jxr_comment"> * contributor license agreements.  See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a>   <em class="jxr_comment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a>   <em class="jxr_comment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a>   <em class="jxr_comment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a>   <em class="jxr_comment"> * the License.  You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a>   <em class="jxr_comment"> *</em>
+<a name="9" href="#9">9</a>   <em class="jxr_comment"> *     <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a>  <em class="jxr_comment"> *</em>
+<a name="11" href="#11">11</a>  <em class="jxr_comment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a>  <em class="jxr_comment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a>  <em class="jxr_comment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a>  <em class="jxr_comment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a>  <em class="jxr_comment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a>  <em class="jxr_comment"> */</em>
+<a name="17" href="#17">17</a>  <strong class="jxr_keyword">package</strong> org.apache.tika.parser;
+<a name="18" href="#18">18</a>  
+<a name="19" href="#19">19</a>  <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="20" href="#20">20</a>  
+<a name="21" href="#21">21</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.exception.TikaException;
+<a name="22" href="#22">22</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="23" href="#23">23</a>  <strong class="jxr_keyword">import</strong> org.xml.sax.ContentHandler;
+<a name="24" href="#24">24</a>  
+<a name="25" href="#25">25</a>  <em class="jxr_javadoccomment">/**</em>
+<a name="26" href="#26">26</a>  <em class="jxr_javadoccomment"> * Dummy parser that always throws a {@link TikaException} without even</em>
+<a name="27" href="#27">27</a>  <em class="jxr_javadoccomment"> * attempting to parse the given document stream. Useful as a sentinel parser</em>
+<a name="28" href="#28">28</a>  <em class="jxr_javadoccomment"> * for unknown document types.</em>
+<a name="29" href="#29">29</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="30" href="#30">30</a>  <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../org/apache/tika/parser/ErrorParser.html">ErrorParser</a> implements <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> {
+<a name="31" href="#31">31</a>  
+<a name="32" href="#32">32</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> parse(
+<a name="33" href="#33">33</a>              InputStream stream, ContentHandler handler, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="34" href="#34">34</a>              <strong class="jxr_keyword">throws</strong> <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a> {
+<a name="35" href="#35">35</a>          <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a>(<span class="jxr_string">"Parse error"</span>);
+<a name="36" href="#36">36</a>      }
+<a name="37" href="#37">37</a>  
+<a name="38" href="#38">38</a>  }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+

Added: incubator/tika/site/xref/org/apache/tika/parser/Parser.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/parser/Parser.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/parser/Parser.html (added)
+++ incubator/tika/site/xref/org/apache/tika/parser/Parser.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,63 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>Parser xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/parser/Parser.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a>   <em class="jxr_javadoccomment">/**</em>
+<a name="2" href="#2">2</a>   <em class="jxr_javadoccomment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a>   <em class="jxr_javadoccomment"> * contributor license agreements.  See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a>   <em class="jxr_javadoccomment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a>   <em class="jxr_javadoccomment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a>   <em class="jxr_javadoccomment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a>   <em class="jxr_javadoccomment"> * the License.  You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a>   <em class="jxr_javadoccomment"> *</em>
+<a name="9" href="#9">9</a>   <em class="jxr_javadoccomment"> *     <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a>  <em class="jxr_javadoccomment"> *</em>
+<a name="11" href="#11">11</a>  <em class="jxr_javadoccomment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a>  <em class="jxr_javadoccomment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a>  <em class="jxr_javadoccomment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a>  <em class="jxr_javadoccomment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a>  <em class="jxr_javadoccomment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="17" href="#17">17</a>  <strong class="jxr_keyword">package</strong> org.apache.tika.parser;
+<a name="18" href="#18">18</a>  
+<a name="19" href="#19">19</a>  <strong class="jxr_keyword">import</strong> java.io.IOException;
+<a name="20" href="#20">20</a>  <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="21" href="#21">21</a>  
+<a name="22" href="#22">22</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.exception.TikaException;
+<a name="23" href="#23">23</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="24" href="#24">24</a>  <strong class="jxr_keyword">import</strong> org.xml.sax.ContentHandler;
+<a name="25" href="#25">25</a>  <strong class="jxr_keyword">import</strong> org.xml.sax.SAXException;
+<a name="26" href="#26">26</a>  
+<a name="27" href="#27">27</a>  <em class="jxr_javadoccomment">/**</em>
+<a name="28" href="#28">28</a>  <em class="jxr_javadoccomment"> * Tika parser interface</em>
+<a name="29" href="#29">29</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="30" href="#30">30</a>  <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">interface</strong> <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> {
+<a name="31" href="#31">31</a>  
+<a name="32" href="#32">32</a>      <em class="jxr_javadoccomment">/**</em>
+<a name="33" href="#33">33</a>  <em class="jxr_javadoccomment">     * Parses a document stream into a sequence of XHTML SAX events.</em>
+<a name="34" href="#34">34</a>  <em class="jxr_javadoccomment">     * Fills in related document metadata in the given metadata object.</em>
+<a name="35" href="#35">35</a>  <em class="jxr_javadoccomment">     * &lt;p&gt;</em>
+<a name="36" href="#36">36</a>  <em class="jxr_javadoccomment">     * The given document stream is consumed but not closed by this method.</em>
+<a name="37" href="#37">37</a>  <em class="jxr_javadoccomment">     * The responsibility to close the stream remains on the caller.</em>
+<a name="38" href="#38">38</a>  <em class="jxr_javadoccomment">     *</em>
+<a name="39" href="#39">39</a>  <em class="jxr_javadoccomment">     * @param stream the document stream (input)</em>
+<a name="40" href="#40">40</a>  <em class="jxr_javadoccomment">     * @param handler handler for the XHTML SAX events (output)</em>
+<a name="41" href="#41">41</a>  <em class="jxr_javadoccomment">     * @param metadata document metadata (input and output)</em>
+<a name="42" href="#42">42</a>  <em class="jxr_javadoccomment">     * @throws IOException if the document stream could not be read</em>
+<a name="43" href="#43">43</a>  <em class="jxr_javadoccomment">     * @throws SAXException if the SAX events could not be processed</em>
+<a name="44" href="#44">44</a>  <em class="jxr_javadoccomment">     * @throws TikaException if the document could not be parsed</em>
+<a name="45" href="#45">45</a>  <em class="jxr_javadoccomment">     */</em>
+<a name="46" href="#46">46</a>      <strong class="jxr_keyword">void</strong> parse(InputStream stream, ContentHandler handler, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="47" href="#47">47</a>              <strong class="jxr_keyword">throws</strong> IOException, SAXException, TikaException;
+<a name="48" href="#48">48</a>  
+<a name="49" href="#49">49</a>  }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+

Added: incubator/tika/site/xref/org/apache/tika/parser/ParserDecorator.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/parser/ParserDecorator.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/parser/ParserDecorator.html (added)
+++ incubator/tika/site/xref/org/apache/tika/parser/ParserDecorator.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,74 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>ParserDecorator xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/parser/ParserDecorator.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a>   <em class="jxr_javadoccomment">/**</em>
+<a name="2" href="#2">2</a>   <em class="jxr_javadoccomment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a>   <em class="jxr_javadoccomment"> * contributor license agreements.  See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a>   <em class="jxr_javadoccomment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a>   <em class="jxr_javadoccomment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a>   <em class="jxr_javadoccomment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a>   <em class="jxr_javadoccomment"> * the License.  You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a>   <em class="jxr_javadoccomment"> *</em>
+<a name="9" href="#9">9</a>   <em class="jxr_javadoccomment"> *     <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a>  <em class="jxr_javadoccomment"> *</em>
+<a name="11" href="#11">11</a>  <em class="jxr_javadoccomment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a>  <em class="jxr_javadoccomment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a>  <em class="jxr_javadoccomment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a>  <em class="jxr_javadoccomment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a>  <em class="jxr_javadoccomment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="17" href="#17">17</a>  <strong class="jxr_keyword">package</strong> org.apache.tika.parser;
+<a name="18" href="#18">18</a>  
+<a name="19" href="#19">19</a>  <strong class="jxr_keyword">import</strong> java.io.IOException;
+<a name="20" href="#20">20</a>  <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="21" href="#21">21</a>  
+<a name="22" href="#22">22</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.exception.TikaException;
+<a name="23" href="#23">23</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="24" href="#24">24</a>  <strong class="jxr_keyword">import</strong> org.xml.sax.ContentHandler;
+<a name="25" href="#25">25</a>  <strong class="jxr_keyword">import</strong> org.xml.sax.SAXException;
+<a name="26" href="#26">26</a>  
+<a name="27" href="#27">27</a>  <em class="jxr_javadoccomment">/**</em>
+<a name="28" href="#28">28</a>  <em class="jxr_javadoccomment"> * Decorator base class for the {@link Parser} interface. This class</em>
+<a name="29" href="#29">29</a>  <em class="jxr_javadoccomment"> * simply delegates all parsing calls to an underlying decorated parser</em>
+<a name="30" href="#30">30</a>  <em class="jxr_javadoccomment"> * instance. Subclasses can provide extra decoration by overriding the</em>
+<a name="31" href="#31">31</a>  <em class="jxr_javadoccomment"> * parse method.</em>
+<a name="32" href="#32">32</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="33" href="#33">33</a>  <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../org/apache/tika/parser/ParserDecorator.html">ParserDecorator</a> implements <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> {
+<a name="34" href="#34">34</a>  
+<a name="35" href="#35">35</a>      <em class="jxr_javadoccomment">/**</em>
+<a name="36" href="#36">36</a>  <em class="jxr_javadoccomment">     * The decorated parser instance.</em>
+<a name="37" href="#37">37</a>  <em class="jxr_javadoccomment">     */</em>
+<a name="38" href="#38">38</a>      <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">final</strong> <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> parser;
+<a name="39" href="#39">39</a>  
+<a name="40" href="#40">40</a>      <em class="jxr_javadoccomment">/**</em>
+<a name="41" href="#41">41</a>  <em class="jxr_javadoccomment">     * Creates a decorator for the given parser.</em>
+<a name="42" href="#42">42</a>  <em class="jxr_javadoccomment">     *</em>
+<a name="43" href="#43">43</a>  <em class="jxr_javadoccomment">     * @param parser the parser instance to be decorated</em>
+<a name="44" href="#44">44</a>  <em class="jxr_javadoccomment">     */</em>
+<a name="45" href="#45">45</a>      <strong class="jxr_keyword">public</strong> <a href="../../../../org/apache/tika/parser/ParserDecorator.html">ParserDecorator</a>(<a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> parser) {
+<a name="46" href="#46">46</a>          <strong class="jxr_keyword">this</strong>.parser = parser;
+<a name="47" href="#47">47</a>      }
+<a name="48" href="#48">48</a>  
+<a name="49" href="#49">49</a>      <em class="jxr_javadoccomment">/**</em>
+<a name="50" href="#50">50</a>  <em class="jxr_javadoccomment">     * Delegates the method call to the decorated parser. Subclasses should</em>
+<a name="51" href="#51">51</a>  <em class="jxr_javadoccomment">     * override this method (and use &lt;code&gt;super.parse()&lt;/code&gt; to invoke</em>
+<a name="52" href="#52">52</a>  <em class="jxr_javadoccomment">     * the decorated parser) to implement extra decoration.</em>
+<a name="53" href="#53">53</a>  <em class="jxr_javadoccomment">     */</em>
+<a name="54" href="#54">54</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> parse(
+<a name="55" href="#55">55</a>              InputStream stream, ContentHandler handler, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="56" href="#56">56</a>              <strong class="jxr_keyword">throws</strong> IOException, SAXException, <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a> {
+<a name="57" href="#57">57</a>          parser.parse(stream, handler, metadata);
+<a name="58" href="#58">58</a>      }
+<a name="59" href="#59">59</a>  
+<a name="60" href="#60">60</a>  }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+

Added: incubator/tika/site/xref/org/apache/tika/parser/ParserPostProcessor.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/parser/ParserPostProcessor.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/parser/ParserPostProcessor.html (added)
+++ incubator/tika/site/xref/org/apache/tika/parser/ParserPostProcessor.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,96 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>ParserPostProcessor xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/parser/ParserPostProcessor.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a>   <em class="jxr_javadoccomment">/**</em>
+<a name="2" href="#2">2</a>   <em class="jxr_javadoccomment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a>   <em class="jxr_javadoccomment"> * contributor license agreements.  See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a>   <em class="jxr_javadoccomment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a>   <em class="jxr_javadoccomment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a>   <em class="jxr_javadoccomment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a>   <em class="jxr_javadoccomment"> * the License.  You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a>   <em class="jxr_javadoccomment"> *</em>
+<a name="9" href="#9">9</a>   <em class="jxr_javadoccomment"> *     <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a>  <em class="jxr_javadoccomment"> *</em>
+<a name="11" href="#11">11</a>  <em class="jxr_javadoccomment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a>  <em class="jxr_javadoccomment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a>  <em class="jxr_javadoccomment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a>  <em class="jxr_javadoccomment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a>  <em class="jxr_javadoccomment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="17" href="#17">17</a>  <strong class="jxr_keyword">package</strong> org.apache.tika.parser;
+<a name="18" href="#18">18</a>  
+<a name="19" href="#19">19</a>  <strong class="jxr_keyword">import</strong> java.io.IOException;
+<a name="20" href="#20">20</a>  <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="21" href="#21">21</a>  <strong class="jxr_keyword">import</strong> java.io.StringWriter;
+<a name="22" href="#22">22</a>  
+<a name="23" href="#23">23</a>  <strong class="jxr_keyword">import</strong> org.apache.oro.text.regex.MalformedPatternException;
+<a name="24" href="#24">24</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.exception.TikaException;
+<a name="25" href="#25">25</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="26" href="#26">26</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.sax.TeeContentHandler;
+<a name="27" href="#27">27</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.sax.WriteOutContentHandler;
+<a name="28" href="#28">28</a>  <strong class="jxr_keyword">import</strong> org.apache.tika.utils.RegexUtils;
+<a name="29" href="#29">29</a>  <strong class="jxr_keyword">import</strong> org.xml.sax.ContentHandler;
+<a name="30" href="#30">30</a>  <strong class="jxr_keyword">import</strong> org.xml.sax.SAXException;
+<a name="31" href="#31">31</a>  
+<a name="32" href="#32">32</a>  <em class="jxr_javadoccomment">/**</em>
+<a name="33" href="#33">33</a>  <em class="jxr_javadoccomment"> * Parser decorator that post-processes the results from a decorated parser.</em>
+<a name="34" href="#34">34</a>  <em class="jxr_javadoccomment"> * The post-processing takes care of filling in any "fulltext", "summary", and</em>
+<a name="35" href="#35">35</a>  <em class="jxr_javadoccomment"> * regexp {@link Content} objects with the full text content returned by</em>
+<a name="36" href="#36">36</a>  <em class="jxr_javadoccomment"> * the decorated parser. The post-processing also catches and logs any</em>
+<a name="37" href="#37">37</a>  <em class="jxr_javadoccomment"> * exceptions thrown by the decorated parser.</em>
+<a name="38" href="#38">38</a>  <em class="jxr_javadoccomment"> */</em>
+<a name="39" href="#39">39</a>  <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../org/apache/tika/parser/ParserPostProcessor.html">ParserPostProcessor</a> <strong class="jxr_keyword">extends</strong> <a href="../../../../org/apache/tika/parser/ParserDecorator.html">ParserDecorator</a> {
+<a name="40" href="#40">40</a>  
+<a name="41" href="#41">41</a>      <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">final</strong> String LINK_PATTERN =
+<a name="42" href="#42">42</a>          <span class="jxr_string">"([A-Za-z][A-Za-z0-9+.-]{1,120}:"</span>
+<a name="43" href="#43">43</a>          + <span class="jxr_string">"[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&amp;~=-])|%[A-Fa-f0-9]{2}){1,333}"</span>
+<a name="44" href="#44">44</a>          + <span class="jxr_string">"(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&amp;~=%-]{0,1000}))?)"</span>;
+<a name="45" href="#45">45</a>  
+<a name="46" href="#46">46</a>      <em class="jxr_javadoccomment">/**</em>
+<a name="47" href="#47">47</a>  <em class="jxr_javadoccomment">     * Creates a post-processing decorator for the given parser.</em>
+<a name="48" href="#48">48</a>  <em class="jxr_javadoccomment">     *</em>
+<a name="49" href="#49">49</a>  <em class="jxr_javadoccomment">     * @param parser the parser to be decorated</em>
+<a name="50" href="#50">50</a>  <em class="jxr_javadoccomment">     */</em>
+<a name="51" href="#51">51</a>      <strong class="jxr_keyword">public</strong> <a href="../../../../org/apache/tika/parser/ParserPostProcessor.html">ParserPostProcessor</a>(<a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> parser) {
+<a name="52" href="#52">52</a>          <strong class="jxr_keyword">super</strong>(parser);
+<a name="53" href="#53">53</a>      }
+<a name="54" href="#54">54</a>  
+<a name="55" href="#55">55</a>      <em class="jxr_javadoccomment">/**</em>
+<a name="56" href="#56">56</a>  <em class="jxr_javadoccomment">     * Forwards the call to the delegated parser and post-processes the</em>
+<a name="57" href="#57">57</a>  <em class="jxr_javadoccomment">     * results as described above.</em>
+<a name="58" href="#58">58</a>  <em class="jxr_javadoccomment">     */</em>
+<a name="59" href="#59">59</a>      <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> parse(
+<a name="60" href="#60">60</a>              InputStream stream, ContentHandler handler, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="61" href="#61">61</a>              <strong class="jxr_keyword">throws</strong> IOException, SAXException, <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a> {
+<a name="62" href="#62">62</a>          StringWriter writer = <strong class="jxr_keyword">new</strong> StringWriter();
+<a name="63" href="#63">63</a>          handler = <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/sax/TeeContentHandler.html">TeeContentHandler</a>(
+<a name="64" href="#64">64</a>                  handler, <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/sax/WriteOutContentHandler.html">WriteOutContentHandler</a>(writer));
+<a name="65" href="#65">65</a>          <strong class="jxr_keyword">super</strong>.parse(stream, handler, metadata);
+<a name="66" href="#66">66</a>  
+<a name="67" href="#67">67</a>          String content = writer.toString();
+<a name="68" href="#68">68</a>          metadata.set(<span class="jxr_string">"fulltext"</span>, content);
+<a name="69" href="#69">69</a>  
+<a name="70" href="#70">70</a>          <strong class="jxr_keyword">int</strong> length = Math.min(content.length(), 500);
+<a name="71" href="#71">71</a>          metadata.set(<span class="jxr_string">"summary"</span>, content.substring(0, length));
+<a name="72" href="#72">72</a>  
+<a name="73" href="#73">73</a>          <strong class="jxr_keyword">try</strong> {
+<a name="74" href="#74">74</a>              <strong class="jxr_keyword">for</strong> (String link : RegexUtils.extract(content, LINK_PATTERN)) {
+<a name="75" href="#75">75</a>                  metadata.add(<span class="jxr_string">"outlinks"</span>, link);
+<a name="76" href="#76">76</a>              }
+<a name="77" href="#77">77</a>          } <strong class="jxr_keyword">catch</strong> (MalformedPatternException e) {
+<a name="78" href="#78">78</a>              <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a>(<span class="jxr_string">"Malformed URL pattern"</span>, e);
+<a name="79" href="#79">79</a>          }
+<a name="80" href="#80">80</a>      }
+<a name="81" href="#81">81</a>  
+<a name="82" href="#82">82</a>  }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+