You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2007/11/18 23:21:13 UTC
svn commit: r596146 [30/36] - in /incubator/tika/site: ./ apidocs/
apidocs/org/ apidocs/org/apache/ apidocs/org/apache/tika/
apidocs/org/apache/tika/config/ apidocs/org/apache/tika/config/class-use/
apidocs/org/apache/tika/exception/ apidocs/org/apache...
Added: incubator/tika/site/xref/org/apache/tika/mime/Patterns.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/mime/Patterns.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/mime/Patterns.html (added)
+++ incubator/tika/site/xref/org/apache/tika/mime/Patterns.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,191 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>Patterns xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/mime/Patterns.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a> <em class="jxr_javadoccomment">/**</em>
+<a name="2" href="#2">2</a> <em class="jxr_javadoccomment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a> <em class="jxr_javadoccomment"> * contributor license agreements. See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a> <em class="jxr_javadoccomment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a> <em class="jxr_javadoccomment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a> <em class="jxr_javadoccomment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a> <em class="jxr_javadoccomment"> * the License. You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a> <em class="jxr_javadoccomment"> *</em>
+<a name="9" href="#9">9</a> <em class="jxr_javadoccomment"> * <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a> <em class="jxr_javadoccomment"> *</em>
+<a name="11" href="#11">11</a> <em class="jxr_javadoccomment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a> <em class="jxr_javadoccomment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a> <em class="jxr_javadoccomment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a> <em class="jxr_javadoccomment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a> <em class="jxr_javadoccomment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a> <em class="jxr_javadoccomment"> */</em>
+<a name="17" href="#17">17</a> <strong class="jxr_keyword">package</strong> org.apache.tika.mime;
+<a name="18" href="#18">18</a>
+<a name="19" href="#19">19</a> <em class="jxr_comment">// JDK imports</em>
+<a name="20" href="#20">20</a> <strong class="jxr_keyword">import</strong> java.util.Comparator;
+<a name="21" href="#21">21</a> <strong class="jxr_keyword">import</strong> java.util.HashMap;
+<a name="22" href="#22">22</a> <strong class="jxr_keyword">import</strong> java.util.Map;
+<a name="23" href="#23">23</a> <strong class="jxr_keyword">import</strong> java.util.SortedMap;
+<a name="24" href="#24">24</a> <strong class="jxr_keyword">import</strong> java.util.TreeMap;
+<a name="25" href="#25">25</a>
+<a name="26" href="#26">26</a> <em class="jxr_javadoccomment">/**</em>
+<a name="27" href="#27">27</a> <em class="jxr_javadoccomment"> * Defines a MimeType pattern.</em>
+<a name="28" href="#28">28</a> <em class="jxr_javadoccomment"> */</em>
+<a name="29" href="#29">29</a> <strong class="jxr_keyword">class</strong> <a href="../../../../org/apache/tika/mime/Patterns.html">Patterns</a> {
+<a name="30" href="#30">30</a>
+<a name="31" href="#31">31</a> <em class="jxr_javadoccomment">/**</em>
+<a name="32" href="#32">32</a> <em class="jxr_javadoccomment"> * Index of exact name patterns.</em>
+<a name="33" href="#33">33</a> <em class="jxr_javadoccomment"> */</em>
+<a name="34" href="#34">34</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">final</strong> Map<String, MimeType> names = <strong class="jxr_keyword">new</strong> HashMap<String, MimeType>();
+<a name="35" href="#35">35</a>
+<a name="36" href="#36">36</a> <em class="jxr_javadoccomment">/**</em>
+<a name="37" href="#37">37</a> <em class="jxr_javadoccomment"> * Index of extension patterns of the form "*extension".</em>
+<a name="38" href="#38">38</a> <em class="jxr_javadoccomment"> */</em>
+<a name="39" href="#39">39</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">final</strong> Map<String, MimeType> extensions =
+<a name="40" href="#40">40</a> <strong class="jxr_keyword">new</strong> HashMap<String, MimeType>();
+<a name="41" href="#41">41</a>
+<a name="42" href="#42">42</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">int</strong> minExtensionLength = Integer.MAX_VALUE;
+<a name="43" href="#43">43</a>
+<a name="44" href="#44">44</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">int</strong> maxExtensionLength = 0;
+<a name="45" href="#45">45</a>
+<a name="46" href="#46">46</a> <em class="jxr_javadoccomment">/**</em>
+<a name="47" href="#47">47</a> <em class="jxr_javadoccomment"> * Index of generic glob patterns, sorted by length.</em>
+<a name="48" href="#48">48</a> <em class="jxr_javadoccomment"> */</em>
+<a name="49" href="#49">49</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">final</strong> SortedMap<String, MimeType> globs =
+<a name="50" href="#50">50</a> <strong class="jxr_keyword">new</strong> TreeMap<String, MimeType>(<strong class="jxr_keyword">new</strong> Comparator<String>() {
+<a name="51" href="#51">51</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">int</strong> compare(String a, String b) {
+<a name="52" href="#52">52</a> <strong class="jxr_keyword">int</strong> diff = b.length() - a.length();
+<a name="53" href="#53">53</a> <strong class="jxr_keyword">if</strong> (diff == 0) {
+<a name="54" href="#54">54</a> diff = a.compareTo(b);
+<a name="55" href="#55">55</a> }
+<a name="56" href="#56">56</a> <strong class="jxr_keyword">return</strong> diff;
+<a name="57" href="#57">57</a> }
+<a name="58" href="#58">58</a> });
+<a name="59" href="#59">59</a>
+<a name="60" href="#60">60</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> add(String pattern, <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type) <strong class="jxr_keyword">throws</strong> MimeTypeException {
+<a name="61" href="#61">61</a> assert pattern != <strong class="jxr_keyword">null</strong> && type != <strong class="jxr_keyword">null</strong>;
+<a name="62" href="#62">62</a>
+<a name="63" href="#63">63</a> <strong class="jxr_keyword">if</strong> (pattern.indexOf('*') == -1
+<a name="64" href="#64">64</a> && pattern.indexOf('?') == -1
+<a name="65" href="#65">65</a> && pattern.indexOf('[') == -1) {
+<a name="66" href="#66">66</a> addName(pattern, type);
+<a name="67" href="#67">67</a> } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (pattern.startsWith(<span class="jxr_string">"*"</span>)
+<a name="68" href="#68">68</a> && pattern.indexOf('*', 1) == -1
+<a name="69" href="#69">69</a> && pattern.indexOf('?') == -1
+<a name="70" href="#70">70</a> && pattern.indexOf('[') == -1) {
+<a name="71" href="#71">71</a> addExtension(pattern.substring(1), type);
+<a name="72" href="#72">72</a> } <strong class="jxr_keyword">else</strong> {
+<a name="73" href="#73">73</a> addGlob(compile(pattern), type);
+<a name="74" href="#74">74</a> }
+<a name="75" href="#75">75</a> }
+<a name="76" href="#76">76</a>
+<a name="77" href="#77">77</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">void</strong> addName(String name, <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type) <strong class="jxr_keyword">throws</strong> MimeTypeException {
+<a name="78" href="#78">78</a> <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> previous = names.get(name);
+<a name="79" href="#79">79</a> <strong class="jxr_keyword">if</strong> (previous == <strong class="jxr_keyword">null</strong> || previous.isDescendantOf(type)) {
+<a name="80" href="#80">80</a> names.put(name, type);
+<a name="81" href="#81">81</a> } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (previous == type || type.isDescendantOf(previous)) {
+<a name="82" href="#82">82</a> <em class="jxr_comment">// do nothing</em>
+<a name="83" href="#83">83</a> } <strong class="jxr_keyword">else</strong> {
+<a name="84" href="#84">84</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/mime/MimeTypeException.html">MimeTypeException</a>(<span class="jxr_string">"Conflicting name pattern: "</span> + name);
+<a name="85" href="#85">85</a> }
+<a name="86" href="#86">86</a> }
+<a name="87" href="#87">87</a>
+<a name="88" href="#88">88</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">void</strong> addExtension(String extension, <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type)
+<a name="89" href="#89">89</a> <strong class="jxr_keyword">throws</strong> <a href="../../../../org/apache/tika/mime/MimeTypeException.html">MimeTypeException</a> {
+<a name="90" href="#90">90</a> <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> previous = extensions.get(extension);
+<a name="91" href="#91">91</a> <strong class="jxr_keyword">if</strong> (previous == <strong class="jxr_keyword">null</strong> || previous.isDescendantOf(type)) {
+<a name="92" href="#92">92</a> extensions.put(extension, type);
+<a name="93" href="#93">93</a> <strong class="jxr_keyword">int</strong> length = extension.length();
+<a name="94" href="#94">94</a> minExtensionLength = Math.min(minExtensionLength, length);
+<a name="95" href="#95">95</a> maxExtensionLength = Math.max(maxExtensionLength, length);
+<a name="96" href="#96">96</a> } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (previous == type || type.isDescendantOf(previous)) {
+<a name="97" href="#97">97</a> <em class="jxr_comment">// do nothing</em>
+<a name="98" href="#98">98</a> } <strong class="jxr_keyword">else</strong> {
+<a name="99" href="#99">99</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/mime/MimeTypeException.html">MimeTypeException</a>(
+<a name="100" href="#100">100</a> <span class="jxr_string">"Conflicting extension pattern: "</span> + extension);
+<a name="101" href="#101">101</a> }
+<a name="102" href="#102">102</a> }
+<a name="103" href="#103">103</a>
+<a name="104" href="#104">104</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">void</strong> addGlob(String glob, <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type)
+<a name="105" href="#105">105</a> <strong class="jxr_keyword">throws</strong> <a href="../../../../org/apache/tika/mime/MimeTypeException.html">MimeTypeException</a> {
+<a name="106" href="#106">106</a> <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> previous = globs.get(glob);
+<a name="107" href="#107">107</a> <strong class="jxr_keyword">if</strong> (previous == <strong class="jxr_keyword">null</strong> || previous.isDescendantOf(type)) {
+<a name="108" href="#108">108</a> extensions.put(glob, type);
+<a name="109" href="#109">109</a> } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (previous == type || type.isDescendantOf(previous)) {
+<a name="110" href="#110">110</a> <em class="jxr_comment">// do nothing</em>
+<a name="111" href="#111">111</a> } <strong class="jxr_keyword">else</strong> {
+<a name="112" href="#112">112</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/mime/MimeTypeException.html">MimeTypeException</a>(<span class="jxr_string">"Conflicting glob pattern: "</span> + glob);
+<a name="113" href="#113">113</a> }
+<a name="114" href="#114">114</a> }
+<a name="115" href="#115">115</a>
+<a name="116" href="#116">116</a> <em class="jxr_javadoccomment">/**</em>
+<a name="117" href="#117">117</a> <em class="jxr_javadoccomment"> * Find the MimeType corresponding to a resource name.</em>
+<a name="118" href="#118">118</a> <em class="jxr_javadoccomment"> * </em>
+<a name="119" href="#119">119</a> <em class="jxr_javadoccomment"> * It applies the recommendations detailed in FreeDesktop Shared MIME-info</em>
+<a name="120" href="#120">120</a> <em class="jxr_javadoccomment"> * Database for guessing MimeType from a resource name: It first tries a</em>
+<a name="121" href="#121">121</a> <em class="jxr_javadoccomment"> * case-sensitive match, then try again with the resource name converted to</em>
+<a name="122" href="#122">122</a> <em class="jxr_javadoccomment"> * lower-case if that fails. If several patterns match then the longest</em>
+<a name="123" href="#123">123</a> <em class="jxr_javadoccomment"> * pattern is used. In particular, files with multiple extensions (such as</em>
+<a name="124" href="#124">124</a> <em class="jxr_javadoccomment"> * Data.tar.gz) match the longest sequence of extensions (eg '*.tar.gz' in</em>
+<a name="125" href="#125">125</a> <em class="jxr_javadoccomment"> * preference to '*.gz'). Literal patterns (eg, 'Makefile') are matched</em>
+<a name="126" href="#126">126</a> <em class="jxr_javadoccomment"> * before all others. Patterns beginning with `*.' and containing no other</em>
+<a name="127" href="#127">127</a> <em class="jxr_javadoccomment"> * special characters (`*?[') are matched before other wildcarded patterns</em>
+<a name="128" href="#128">128</a> <em class="jxr_javadoccomment"> * (since this covers the majority of the patterns).</em>
+<a name="129" href="#129">129</a> <em class="jxr_javadoccomment"> */</em>
+<a name="130" href="#130">130</a> <strong class="jxr_keyword">public</strong> <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> matches(String name) {
+<a name="131" href="#131">131</a> assert name != <strong class="jxr_keyword">null</strong>;
+<a name="132" href="#132">132</a>
+<a name="133" href="#133">133</a> <em class="jxr_comment">// First, try exact match of the provided resource name</em>
+<a name="134" href="#134">134</a> <strong class="jxr_keyword">if</strong> (names.containsKey(name)) {
+<a name="135" href="#135">135</a> <strong class="jxr_keyword">return</strong> names.get(name);
+<a name="136" href="#136">136</a> }
+<a name="137" href="#137">137</a>
+<a name="138" href="#138">138</a> <em class="jxr_comment">// Then try "extension" (*.xxx) matching</em>
+<a name="139" href="#139">139</a> <strong class="jxr_keyword">int</strong> maxLength = Math.min(maxExtensionLength, name.length());
+<a name="140" href="#140">140</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> n = maxLength; n >= minExtensionLength; n--) {
+<a name="141" href="#141">141</a> String extension = name.substring(name.length() - n);
+<a name="142" href="#142">142</a> <strong class="jxr_keyword">if</strong> (extensions.containsKey(extension)) {
+<a name="143" href="#143">143</a> <strong class="jxr_keyword">return</strong> extensions.get(extension);
+<a name="144" href="#144">144</a> }
+<a name="145" href="#145">145</a> }
+<a name="146" href="#146">146</a>
+<a name="147" href="#147">147</a> <em class="jxr_comment">// And finally, try complex regexp matching</em>
+<a name="148" href="#148">148</a> <strong class="jxr_keyword">for</strong> (Map.Entry<String, MimeType> entry : globs.entrySet()) {
+<a name="149" href="#149">149</a> <strong class="jxr_keyword">if</strong> (name.matches(entry.getKey())) {
+<a name="150" href="#150">150</a> <strong class="jxr_keyword">return</strong> entry.getValue();
+<a name="151" href="#151">151</a> }
+<a name="152" href="#152">152</a> }
+<a name="153" href="#153">153</a>
+<a name="154" href="#154">154</a> <strong class="jxr_keyword">return</strong> <strong class="jxr_keyword">null</strong>;
+<a name="155" href="#155">155</a> }
+<a name="156" href="#156">156</a>
+<a name="157" href="#157">157</a> <strong class="jxr_keyword">private</strong> String compile(String glob) {
+<a name="158" href="#158">158</a> StringBuilder pattern = <strong class="jxr_keyword">new</strong> StringBuilder();
+<a name="159" href="#159">159</a> pattern.append(<span class="jxr_string">"\\A"</span>);
+<a name="160" href="#160">160</a> <strong class="jxr_keyword">for</strong> (<strong class="jxr_keyword">int</strong> i = 0; i < glob.length(); i++) {
+<a name="161" href="#161">161</a> <strong class="jxr_keyword">char</strong> ch = glob.charAt(i);
+<a name="162" href="#162">162</a> <strong class="jxr_keyword">if</strong> (ch == '?') {
+<a name="163" href="#163">163</a> pattern.append('.');
+<a name="164" href="#164">164</a> } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (ch == '*') {
+<a name="165" href="#165">165</a> pattern.append(<span class="jxr_string">".*"</span>);
+<a name="166" href="#166">166</a> } <strong class="jxr_keyword">else</strong> <strong class="jxr_keyword">if</strong> (<span class="jxr_string">"\\[]^.-$+(){}|"</span>.indexOf(ch) != -1) {
+<a name="167" href="#167">167</a> pattern.append('\\');
+<a name="168" href="#168">168</a> pattern.append(ch);
+<a name="169" href="#169">169</a> } <strong class="jxr_keyword">else</strong> {
+<a name="170" href="#170">170</a> pattern.append(ch);
+<a name="171" href="#171">171</a> }
+<a name="172" href="#172">172</a> }
+<a name="173" href="#173">173</a> pattern.append(<span class="jxr_string">"\\z"</span>);
+<a name="174" href="#174">174</a> <strong class="jxr_keyword">return</strong> pattern.toString();
+<a name="175" href="#175">175</a> }
+<a name="176" href="#176">176</a>
+<a name="177" href="#177">177</a> }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+
Added: incubator/tika/site/xref/org/apache/tika/mime/package-frame.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/mime/package-frame.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/mime/package-frame.html (added)
+++ incubator/tika/site/xref/org/apache/tika/mime/package-frame.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,72 @@
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "DTD/xhtml1-transitional.dtd">
+<html xml:lang="en" lang="en">
+ <head>
+ <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+ <title>Apache Tika 0.1-SNAPSHOT Reference Package org.apache.tika.mime</title>
+ <link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="style" />
+ </head>
+ <body>
+
+ <h3>
+ <a href="package-summary.html" target="classFrame">org.apache.tika.mime</a>
+ </h3>
+
+ <h3>Classes</h3>
+
+ <ul>
+ <li>
+ <a href="Operator.html" target="classFrame">And</a>
+ </li>
+ <li>
+ <a href="Clause.html" target="classFrame">Clause</a>
+ </li>
+ <li>
+ <a href="Clause.html" target="classFrame">False</a>
+ </li>
+ <li>
+ <a href="HexCoDec.html" target="classFrame">HexCoDec</a>
+ </li>
+ <li>
+ <a href="Magic.html" target="classFrame">Magic</a>
+ </li>
+ <li>
+ <a href="MagicClause.html" target="classFrame">MagicClause</a>
+ </li>
+ <li>
+ <a href="MagicMatch.html" target="classFrame">MagicMatch</a>
+ </li>
+ <li>
+ <a href="MimeType.html" target="classFrame">MimeType</a>
+ </li>
+ <li>
+ <a href="MimeTypeException.html" target="classFrame">MimeTypeException</a>
+ </li>
+ <li>
+ <a href="MimeTypes.html" target="classFrame">MimeTypes</a>
+ </li>
+ <li>
+ <a href="MimeTypesFactory.html" target="classFrame">MimeTypesFactory</a>
+ </li>
+ <li>
+ <a href="MimeTypesReader.html" target="classFrame">MimeTypesReader</a>
+ </li>
+ <li>
+ <a href="Operator.html" target="classFrame">Operator</a>
+ </li>
+ <li>
+ <a href="Operator.html" target="classFrame">Or</a>
+ </li>
+ <li>
+ <a href="Patterns.html" target="classFrame">Patterns</a>
+ </li>
+ <li>
+ <a href="MimeType.html" target="classFrame">RootXML</a>
+ </li>
+ <li>
+ <a href="Clause.html" target="classFrame">True</a>
+ </li>
+ </ul>
+
+ </body>
+</html>
\ No newline at end of file
Added: incubator/tika/site/xref/org/apache/tika/mime/package-summary.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/mime/package-summary.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/mime/package-summary.html (added)
+++ incubator/tika/site/xref/org/apache/tika/mime/package-summary.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,147 @@
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "DTD/xhtml1-transitional.dtd">
+<html xml:lang="en" lang="en">
+ <head>
+ <meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+ <title>Apache Tika 0.1-SNAPSHOT Reference Package org.apache.tika.mime</title>
+ <link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="style" />
+ </head>
+ <body>
+ <div class="overview">
+ <ul>
+ <li>
+ <a href="../../../../overview-summary.html">Overview</a>
+ </li>
+ <li class="selected">Package</li>
+ </ul>
+ </div>
+ <div class="framenoframe">
+ <ul>
+ <li>
+ <a href="../../../../index.html" target="_top">FRAMES</a>
+ </li>
+ <li>
+ <a href="package-summary.html" target="_top">NO FRAMES</a>
+ </li>
+ </ul>
+ </div>
+
+ <h2>Package org.apache.tika.mime</h2>
+
+ <table class="summary">
+ <thead>
+ <tr>
+ <th>Class Summary</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>
+ <a href="Operator.html" target="classFrame">And</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="Clause.html" target="classFrame">Clause</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="Clause.html" target="classFrame">False</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="HexCoDec.html" target="classFrame">HexCoDec</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="Magic.html" target="classFrame">Magic</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="MagicClause.html" target="classFrame">MagicClause</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="MagicMatch.html" target="classFrame">MagicMatch</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="MimeType.html" target="classFrame">MimeType</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="MimeTypeException.html" target="classFrame">MimeTypeException</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="MimeTypes.html" target="classFrame">MimeTypes</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="MimeTypesFactory.html" target="classFrame">MimeTypesFactory</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="MimeTypesReader.html" target="classFrame">MimeTypesReader</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="Operator.html" target="classFrame">Operator</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="Operator.html" target="classFrame">Or</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="Patterns.html" target="classFrame">Patterns</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="MimeType.html" target="classFrame">RootXML</a>
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="Clause.html" target="classFrame">True</a>
+ </td>
+ </tr>
+ </tbody>
+ </table>
+
+ <div class="overview">
+ <ul>
+ <li>
+ <a href="../../../../overview-summary.html">Overview</a>
+ </li>
+ <li class="selected">Package</li>
+ </ul>
+ </div>
+ <div class="framenoframe">
+ <ul>
+ <li>
+ <a href="../../../../index.html" target="_top">FRAMES</a>
+ </li>
+ <li>
+ <a href="package-summary.html" target="_top">NO FRAMES</a>
+ </li>
+ </ul>
+ </div>
+ <hr />
+ Copyright © 2007 The Apache Software Foundation. All Rights Reserved.
+ </body>
+</html>
\ No newline at end of file
Added: incubator/tika/site/xref/org/apache/tika/parser/AutoDetectParser.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/parser/AutoDetectParser.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/parser/AutoDetectParser.html (added)
+++ incubator/tika/site/xref/org/apache/tika/parser/AutoDetectParser.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,188 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>AutoDetectParser xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/parser/AutoDetectParser.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a> <em class="jxr_javadoccomment">/**</em>
+<a name="2" href="#2">2</a> <em class="jxr_javadoccomment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a> <em class="jxr_javadoccomment"> * contributor license agreements. See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a> <em class="jxr_javadoccomment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a> <em class="jxr_javadoccomment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a> <em class="jxr_javadoccomment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a> <em class="jxr_javadoccomment"> * the License. You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a> <em class="jxr_javadoccomment"> *</em>
+<a name="9" href="#9">9</a> <em class="jxr_javadoccomment"> * <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a> <em class="jxr_javadoccomment"> *</em>
+<a name="11" href="#11">11</a> <em class="jxr_javadoccomment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a> <em class="jxr_javadoccomment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a> <em class="jxr_javadoccomment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a> <em class="jxr_javadoccomment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a> <em class="jxr_javadoccomment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a> <em class="jxr_javadoccomment"> */</em>
+<a name="17" href="#17">17</a> <strong class="jxr_keyword">package</strong> org.apache.tika.parser;
+<a name="18" href="#18">18</a>
+<a name="19" href="#19">19</a> <strong class="jxr_keyword">import</strong> java.io.BufferedInputStream;
+<a name="20" href="#20">20</a> <strong class="jxr_keyword">import</strong> java.io.ByteArrayOutputStream;
+<a name="21" href="#21">21</a> <strong class="jxr_keyword">import</strong> java.io.IOException;
+<a name="22" href="#22">22</a> <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="23" href="#23">23</a>
+<a name="24" href="#24">24</a> <strong class="jxr_keyword">import</strong> org.apache.tika.config.TikaConfig;
+<a name="25" href="#25">25</a> <strong class="jxr_keyword">import</strong> org.apache.tika.exception.TikaException;
+<a name="26" href="#26">26</a> <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="27" href="#27">27</a> <strong class="jxr_keyword">import</strong> org.apache.tika.mime.MimeType;
+<a name="28" href="#28">28</a> <strong class="jxr_keyword">import</strong> org.apache.tika.mime.MimeTypeException;
+<a name="29" href="#29">29</a> <strong class="jxr_keyword">import</strong> org.apache.tika.mime.MimeTypes;
+<a name="30" href="#30">30</a> <strong class="jxr_keyword">import</strong> org.jdom.JDOMException;
+<a name="31" href="#31">31</a> <strong class="jxr_keyword">import</strong> org.xml.sax.ContentHandler;
+<a name="32" href="#32">32</a> <strong class="jxr_keyword">import</strong> org.xml.sax.SAXException;
+<a name="33" href="#33">33</a>
+<a name="34" href="#34">34</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../org/apache/tika/parser/AutoDetectParser.html">AutoDetectParser</a> implements <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> {
+<a name="35" href="#35">35</a>
+<a name="36" href="#36">36</a> <strong class="jxr_keyword">private</strong> <a href="../../../../org/apache/tika/config/TikaConfig.html">TikaConfig</a> config;
+<a name="37" href="#37">37</a>
+<a name="38" href="#38">38</a> <em class="jxr_javadoccomment">/**</em>
+<a name="39" href="#39">39</a> <em class="jxr_javadoccomment"> * Creates an auto-detecting parser instance using the default Tika</em>
+<a name="40" href="#40">40</a> <em class="jxr_javadoccomment"> * configuration.</em>
+<a name="41" href="#41">41</a> <em class="jxr_javadoccomment"> */</em>
+<a name="42" href="#42">42</a> <strong class="jxr_keyword">public</strong> <a href="../../../../org/apache/tika/parser/AutoDetectParser.html">AutoDetectParser</a>() {
+<a name="43" href="#43">43</a> <strong class="jxr_keyword">try</strong> {
+<a name="44" href="#44">44</a> config = TikaConfig.getDefaultConfig();
+<a name="45" href="#45">45</a> } <strong class="jxr_keyword">catch</strong> (IOException e) {
+<a name="46" href="#46">46</a> <em class="jxr_comment">// FIXME: This should never happen</em>
+<a name="47" href="#47">47</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> RuntimeException(e);
+<a name="48" href="#48">48</a> } <strong class="jxr_keyword">catch</strong> (JDOMException e) {
+<a name="49" href="#49">49</a> <em class="jxr_comment">// FIXME: This should never happen</em>
+<a name="50" href="#50">50</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> RuntimeException(e);
+<a name="51" href="#51">51</a> }
+<a name="52" href="#52">52</a> }
+<a name="53" href="#53">53</a>
+<a name="54" href="#54">54</a> <strong class="jxr_keyword">public</strong> <a href="../../../../org/apache/tika/parser/AutoDetectParser.html">AutoDetectParser</a>(<a href="../../../../org/apache/tika/config/TikaConfig.html">TikaConfig</a> config) {
+<a name="55" href="#55">55</a> <strong class="jxr_keyword">this</strong>.config = config;
+<a name="56" href="#56">56</a> }
+<a name="57" href="#57">57</a>
+<a name="58" href="#58">58</a> <strong class="jxr_keyword">public</strong> <a href="../../../../org/apache/tika/config/TikaConfig.html">TikaConfig</a> getConfig() {
+<a name="59" href="#59">59</a> <strong class="jxr_keyword">return</strong> config;
+<a name="60" href="#60">60</a> }
+<a name="61" href="#61">61</a>
+<a name="62" href="#62">62</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> setConfig(<a href="../../../../org/apache/tika/config/TikaConfig.html">TikaConfig</a> config) {
+<a name="63" href="#63">63</a> <strong class="jxr_keyword">this</strong>.config = config;
+<a name="64" href="#64">64</a> }
+<a name="65" href="#65">65</a>
+<a name="66" href="#66">66</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> parse(
+<a name="67" href="#67">67</a> InputStream stream, ContentHandler handler, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="68" href="#68">68</a> <strong class="jxr_keyword">throws</strong> IOException, SAXException, <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a> {
+<a name="69" href="#69">69</a> <em class="jxr_comment">// We need buffering to enable MIME magic detection before parsing</em>
+<a name="70" href="#70">70</a> <strong class="jxr_keyword">if</strong> (!stream.markSupported()) {
+<a name="71" href="#71">71</a> stream = <strong class="jxr_keyword">new</strong> BufferedInputStream(stream);
+<a name="72" href="#72">72</a> }
+<a name="73" href="#73">73</a>
+<a name="74" href="#74">74</a> <em class="jxr_comment">// Automatically detect the MIME type of the document </em>
+<a name="75" href="#75">75</a> <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type = getMimeType(stream, metadata);
+<a name="76" href="#76">76</a> metadata.set(Metadata.CONTENT_TYPE, type.getName());
+<a name="77" href="#77">77</a>
+<a name="78" href="#78">78</a> <em class="jxr_comment">// Get the parser configured for the detected MIME type</em>
+<a name="79" href="#79">79</a> <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> parser = config.getParser(type.getName());
+<a name="80" href="#80">80</a> <strong class="jxr_keyword">if</strong> (parser == <strong class="jxr_keyword">null</strong>) {
+<a name="81" href="#81">81</a> parser = config.getParser(MimeTypes.DEFAULT);
+<a name="82" href="#82">82</a> }
+<a name="83" href="#83">83</a> <strong class="jxr_keyword">if</strong> (parser == <strong class="jxr_keyword">null</strong>) {
+<a name="84" href="#84">84</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a>(<span class="jxr_string">"No parsers available: "</span> + type.getName());
+<a name="85" href="#85">85</a> }
+<a name="86" href="#86">86</a>
+<a name="87" href="#87">87</a> <em class="jxr_comment">// Parse the document</em>
+<a name="88" href="#88">88</a> parser.parse(stream, handler, metadata);
+<a name="89" href="#89">89</a> }
+<a name="90" href="#90">90</a>
+<a name="91" href="#91">91</a> <em class="jxr_javadoccomment">/**</em>
+<a name="92" href="#92">92</a> <em class="jxr_javadoccomment"> * Automatically detects the MIME type of a document based on magic</em>
+<a name="93" href="#93">93</a> <em class="jxr_javadoccomment"> * markers in the stream prefix and any given metadata hints.</em>
+<a name="94" href="#94">94</a> <em class="jxr_javadoccomment"> * <p></em>
+<a name="95" href="#95">95</a> <em class="jxr_javadoccomment"> * The given stream is expected to support marks, so that this method</em>
+<a name="96" href="#96">96</a> <em class="jxr_javadoccomment"> * can reset the stream to the position it was in before this method</em>
+<a name="97" href="#97">97</a> <em class="jxr_javadoccomment"> * was called.</em>
+<a name="98" href="#98">98</a> <em class="jxr_javadoccomment"> *</em>
+<a name="99" href="#99">99</a> <em class="jxr_javadoccomment"> * @param stream document stream</em>
+<a name="100" href="#100">100</a> <em class="jxr_javadoccomment"> * @param metadata metadata hints</em>
+<a name="101" href="#101">101</a> <em class="jxr_javadoccomment"> * @return MIME type of the document</em>
+<a name="102" href="#102">102</a> <em class="jxr_javadoccomment"> * @throws IOException if the document stream could not be read</em>
+<a name="103" href="#103">103</a> <em class="jxr_javadoccomment"> */</em>
+<a name="104" href="#104">104</a> <strong class="jxr_keyword">private</strong> <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> getMimeType(InputStream stream, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="105" href="#105">105</a> <strong class="jxr_keyword">throws</strong> IOException {
+<a name="106" href="#106">106</a> <a href="../../../../org/apache/tika/mime/MimeTypes.html">MimeTypes</a> types = config.getMimeRepository();
+<a name="107" href="#107">107</a>
+<a name="108" href="#108">108</a> <em class="jxr_comment">// Get type based on magic prefix</em>
+<a name="109" href="#109">109</a> stream.mark(types.getMinLength());
+<a name="110" href="#110">110</a> <strong class="jxr_keyword">try</strong> {
+<a name="111" href="#111">111</a> byte[] prefix = getPrefix(stream, types.getMinLength());
+<a name="112" href="#112">112</a> <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type = types.getMimeType(prefix);
+<a name="113" href="#113">113</a> <strong class="jxr_keyword">if</strong> (type != <strong class="jxr_keyword">null</strong>) {
+<a name="114" href="#114">114</a> <strong class="jxr_keyword">return</strong> type;
+<a name="115" href="#115">115</a> }
+<a name="116" href="#116">116</a> } <strong class="jxr_keyword">finally</strong> {
+<a name="117" href="#117">117</a> stream.reset();
+<a name="118" href="#118">118</a> }
+<a name="119" href="#119">119</a>
+<a name="120" href="#120">120</a> <em class="jxr_comment">// Get type based on resourceName hint (if available)</em>
+<a name="121" href="#121">121</a> String resourceName = metadata.get(Metadata.RESOURCE_NAME_KEY);
+<a name="122" href="#122">122</a> <strong class="jxr_keyword">if</strong> (resourceName != <strong class="jxr_keyword">null</strong>) {
+<a name="123" href="#123">123</a> <a href="../../../../org/apache/tika/mime/MimeType.html">MimeType</a> type = types.getMimeType(resourceName);
+<a name="124" href="#124">124</a> <strong class="jxr_keyword">if</strong> (type != <strong class="jxr_keyword">null</strong>) {
+<a name="125" href="#125">125</a> <strong class="jxr_keyword">return</strong> type;
+<a name="126" href="#126">126</a> }
+<a name="127" href="#127">127</a> }
+<a name="128" href="#128">128</a>
+<a name="129" href="#129">129</a> <em class="jxr_comment">// Get type based on metadata hint (if available)</em>
+<a name="130" href="#130">130</a> String typename = metadata.get(Metadata.CONTENT_TYPE);
+<a name="131" href="#131">131</a> <strong class="jxr_keyword">if</strong> (typename != <strong class="jxr_keyword">null</strong>) {
+<a name="132" href="#132">132</a> <strong class="jxr_keyword">try</strong> {
+<a name="133" href="#133">133</a> <strong class="jxr_keyword">return</strong> types.forName(typename);
+<a name="134" href="#134">134</a> } <strong class="jxr_keyword">catch</strong> (MimeTypeException e) {
+<a name="135" href="#135">135</a> <em class="jxr_comment">// Malformed type name, ignore</em>
+<a name="136" href="#136">136</a> }
+<a name="137" href="#137">137</a> }
+<a name="138" href="#138">138</a>
+<a name="139" href="#139">139</a> <em class="jxr_comment">// Finally, use the default type if no matches found</em>
+<a name="140" href="#140">140</a> <strong class="jxr_keyword">try</strong> {
+<a name="141" href="#141">141</a> <strong class="jxr_keyword">return</strong> types.forName(MimeTypes.DEFAULT);
+<a name="142" href="#142">142</a> } <strong class="jxr_keyword">catch</strong> (MimeTypeException e) {
+<a name="143" href="#143">143</a> <em class="jxr_comment">// Should never happen</em>
+<a name="144" href="#144">144</a> <strong class="jxr_keyword">return</strong> <strong class="jxr_keyword">null</strong>;
+<a name="145" href="#145">145</a> }
+<a name="146" href="#146">146</a> }
+<a name="147" href="#147">147</a>
+<a name="148" href="#148">148</a> <em class="jxr_javadoccomment">/**</em>
+<a name="149" href="#149">149</a> <em class="jxr_javadoccomment"> * Reads and returns the first <code>length</code> bytes from the</em>
+<a name="150" href="#150">150</a> <em class="jxr_javadoccomment"> * given stream. If the stream ends before that, returns all bytes</em>
+<a name="151" href="#151">151</a> <em class="jxr_javadoccomment"> * from the stream.</em>
+<a name="152" href="#152">152</a> <em class="jxr_javadoccomment"> * </em>
+<a name="153" href="#153">153</a> <em class="jxr_javadoccomment"> * @param input input stream</em>
+<a name="154" href="#154">154</a> <em class="jxr_javadoccomment"> * @param length number of bytes to read and return</em>
+<a name="155" href="#155">155</a> <em class="jxr_javadoccomment"> * @return stream prefix</em>
+<a name="156" href="#156">156</a> <em class="jxr_javadoccomment"> * @throws IOException if the stream could not be read</em>
+<a name="157" href="#157">157</a> <em class="jxr_javadoccomment"> */</em>
+<a name="158" href="#158">158</a> <strong class="jxr_keyword">private</strong> byte[] getPrefix(InputStream input, <strong class="jxr_keyword">int</strong> length) <strong class="jxr_keyword">throws</strong> IOException {
+<a name="159" href="#159">159</a> ByteArrayOutputStream output = <strong class="jxr_keyword">new</strong> ByteArrayOutputStream();
+<a name="160" href="#160">160</a> byte[] buffer = <strong class="jxr_keyword">new</strong> byte[Math.min(1024, length)];
+<a name="161" href="#161">161</a> <strong class="jxr_keyword">int</strong> n = input.read(buffer);
+<a name="162" href="#162">162</a> <strong class="jxr_keyword">while</strong> (n != -1) {
+<a name="163" href="#163">163</a> output.write(buffer, 0, n);
+<a name="164" href="#164">164</a> <strong class="jxr_keyword">int</strong> remaining = length - output.size();
+<a name="165" href="#165">165</a> <strong class="jxr_keyword">if</strong> (remaining > 0) {
+<a name="166" href="#166">166</a> n = input.read(buffer, 0, Math.min(buffer.length, remaining));
+<a name="167" href="#167">167</a> } <strong class="jxr_keyword">else</strong> {
+<a name="168" href="#168">168</a> n = -1;
+<a name="169" href="#169">169</a> }
+<a name="170" href="#170">170</a> }
+<a name="171" href="#171">171</a> <strong class="jxr_keyword">return</strong> output.toByteArray();
+<a name="172" href="#172">172</a> }
+<a name="173" href="#173">173</a>
+<a name="174" href="#174">174</a> }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+
Added: incubator/tika/site/xref/org/apache/tika/parser/EmptyParser.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/parser/EmptyParser.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/parser/EmptyParser.html (added)
+++ incubator/tika/site/xref/org/apache/tika/parser/EmptyParser.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,57 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>EmptyParser xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/parser/EmptyParser.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a> <em class="jxr_comment">/*</em>
+<a name="2" href="#2">2</a> <em class="jxr_comment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a> <em class="jxr_comment"> * contributor license agreements. See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a> <em class="jxr_comment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a> <em class="jxr_comment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a> <em class="jxr_comment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a> <em class="jxr_comment"> * the License. You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a> <em class="jxr_comment"> *</em>
+<a name="9" href="#9">9</a> <em class="jxr_comment"> * <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a> <em class="jxr_comment"> *</em>
+<a name="11" href="#11">11</a> <em class="jxr_comment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a> <em class="jxr_comment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a> <em class="jxr_comment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a> <em class="jxr_comment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a> <em class="jxr_comment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a> <em class="jxr_comment"> */</em>
+<a name="17" href="#17">17</a> <strong class="jxr_keyword">package</strong> org.apache.tika.parser;
+<a name="18" href="#18">18</a>
+<a name="19" href="#19">19</a> <strong class="jxr_keyword">import</strong> java.io.IOException;
+<a name="20" href="#20">20</a> <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="21" href="#21">21</a>
+<a name="22" href="#22">22</a> <strong class="jxr_keyword">import</strong> org.apache.tika.exception.TikaException;
+<a name="23" href="#23">23</a> <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="24" href="#24">24</a> <strong class="jxr_keyword">import</strong> org.apache.tika.sax.XHTMLContentHandler;
+<a name="25" href="#25">25</a> <strong class="jxr_keyword">import</strong> org.xml.sax.ContentHandler;
+<a name="26" href="#26">26</a> <strong class="jxr_keyword">import</strong> org.xml.sax.SAXException;
+<a name="27" href="#27">27</a>
+<a name="28" href="#28">28</a> <em class="jxr_javadoccomment">/**</em>
+<a name="29" href="#29">29</a> <em class="jxr_javadoccomment"> * Dummy parser that always produces an empty XHTML document without even</em>
+<a name="30" href="#30">30</a> <em class="jxr_javadoccomment"> * attempting to parse the given document stream. Useful as a sentinel parser</em>
+<a name="31" href="#31">31</a> <em class="jxr_javadoccomment"> * for unknown document types.</em>
+<a name="32" href="#32">32</a> <em class="jxr_javadoccomment"> */</em>
+<a name="33" href="#33">33</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../org/apache/tika/parser/EmptyParser.html">EmptyParser</a> implements <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> {
+<a name="34" href="#34">34</a>
+<a name="35" href="#35">35</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> parse(
+<a name="36" href="#36">36</a> InputStream stream, ContentHandler handler, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="37" href="#37">37</a> <strong class="jxr_keyword">throws</strong> IOException, SAXException, <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a> {
+<a name="38" href="#38">38</a> <a href="../../../../org/apache/tika/sax/XHTMLContentHandler.html">XHTMLContentHandler</a> xhtml = <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/sax/XHTMLContentHandler.html">XHTMLContentHandler</a>(handler, metadata);
+<a name="39" href="#39">39</a> xhtml.startDocument();
+<a name="40" href="#40">40</a> xhtml.endDocument();
+<a name="41" href="#41">41</a> }
+<a name="42" href="#42">42</a>
+<a name="43" href="#43">43</a> }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+
Added: incubator/tika/site/xref/org/apache/tika/parser/ErrorParser.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/parser/ErrorParser.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/parser/ErrorParser.html (added)
+++ incubator/tika/site/xref/org/apache/tika/parser/ErrorParser.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,52 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>ErrorParser xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/parser/ErrorParser.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a> <em class="jxr_comment">/*</em>
+<a name="2" href="#2">2</a> <em class="jxr_comment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a> <em class="jxr_comment"> * contributor license agreements. See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a> <em class="jxr_comment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a> <em class="jxr_comment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a> <em class="jxr_comment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a> <em class="jxr_comment"> * the License. You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a> <em class="jxr_comment"> *</em>
+<a name="9" href="#9">9</a> <em class="jxr_comment"> * <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a> <em class="jxr_comment"> *</em>
+<a name="11" href="#11">11</a> <em class="jxr_comment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a> <em class="jxr_comment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a> <em class="jxr_comment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a> <em class="jxr_comment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a> <em class="jxr_comment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a> <em class="jxr_comment"> */</em>
+<a name="17" href="#17">17</a> <strong class="jxr_keyword">package</strong> org.apache.tika.parser;
+<a name="18" href="#18">18</a>
+<a name="19" href="#19">19</a> <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="20" href="#20">20</a>
+<a name="21" href="#21">21</a> <strong class="jxr_keyword">import</strong> org.apache.tika.exception.TikaException;
+<a name="22" href="#22">22</a> <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="23" href="#23">23</a> <strong class="jxr_keyword">import</strong> org.xml.sax.ContentHandler;
+<a name="24" href="#24">24</a>
+<a name="25" href="#25">25</a> <em class="jxr_javadoccomment">/**</em>
+<a name="26" href="#26">26</a> <em class="jxr_javadoccomment"> * Dummy parser that always throws a {@link TikaException} without even</em>
+<a name="27" href="#27">27</a> <em class="jxr_javadoccomment"> * attempting to parse the given document stream. Useful as a sentinel parser</em>
+<a name="28" href="#28">28</a> <em class="jxr_javadoccomment"> * for unknown document types.</em>
+<a name="29" href="#29">29</a> <em class="jxr_javadoccomment"> */</em>
+<a name="30" href="#30">30</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../org/apache/tika/parser/ErrorParser.html">ErrorParser</a> implements <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> {
+<a name="31" href="#31">31</a>
+<a name="32" href="#32">32</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> parse(
+<a name="33" href="#33">33</a> InputStream stream, ContentHandler handler, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="34" href="#34">34</a> <strong class="jxr_keyword">throws</strong> <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a> {
+<a name="35" href="#35">35</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a>(<span class="jxr_string">"Parse error"</span>);
+<a name="36" href="#36">36</a> }
+<a name="37" href="#37">37</a>
+<a name="38" href="#38">38</a> }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+
Added: incubator/tika/site/xref/org/apache/tika/parser/Parser.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/parser/Parser.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/parser/Parser.html (added)
+++ incubator/tika/site/xref/org/apache/tika/parser/Parser.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,63 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>Parser xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/parser/Parser.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a> <em class="jxr_javadoccomment">/**</em>
+<a name="2" href="#2">2</a> <em class="jxr_javadoccomment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a> <em class="jxr_javadoccomment"> * contributor license agreements. See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a> <em class="jxr_javadoccomment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a> <em class="jxr_javadoccomment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a> <em class="jxr_javadoccomment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a> <em class="jxr_javadoccomment"> * the License. You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a> <em class="jxr_javadoccomment"> *</em>
+<a name="9" href="#9">9</a> <em class="jxr_javadoccomment"> * <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a> <em class="jxr_javadoccomment"> *</em>
+<a name="11" href="#11">11</a> <em class="jxr_javadoccomment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a> <em class="jxr_javadoccomment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a> <em class="jxr_javadoccomment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a> <em class="jxr_javadoccomment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a> <em class="jxr_javadoccomment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a> <em class="jxr_javadoccomment"> */</em>
+<a name="17" href="#17">17</a> <strong class="jxr_keyword">package</strong> org.apache.tika.parser;
+<a name="18" href="#18">18</a>
+<a name="19" href="#19">19</a> <strong class="jxr_keyword">import</strong> java.io.IOException;
+<a name="20" href="#20">20</a> <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="21" href="#21">21</a>
+<a name="22" href="#22">22</a> <strong class="jxr_keyword">import</strong> org.apache.tika.exception.TikaException;
+<a name="23" href="#23">23</a> <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="24" href="#24">24</a> <strong class="jxr_keyword">import</strong> org.xml.sax.ContentHandler;
+<a name="25" href="#25">25</a> <strong class="jxr_keyword">import</strong> org.xml.sax.SAXException;
+<a name="26" href="#26">26</a>
+<a name="27" href="#27">27</a> <em class="jxr_javadoccomment">/**</em>
+<a name="28" href="#28">28</a> <em class="jxr_javadoccomment"> * Tika parser interface</em>
+<a name="29" href="#29">29</a> <em class="jxr_javadoccomment"> */</em>
+<a name="30" href="#30">30</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">interface</strong> <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> {
+<a name="31" href="#31">31</a>
+<a name="32" href="#32">32</a> <em class="jxr_javadoccomment">/**</em>
+<a name="33" href="#33">33</a> <em class="jxr_javadoccomment"> * Parses a document stream into a sequence of XHTML SAX events.</em>
+<a name="34" href="#34">34</a> <em class="jxr_javadoccomment"> * Fills in related document metadata in the given metadata object.</em>
+<a name="35" href="#35">35</a> <em class="jxr_javadoccomment"> * <p></em>
+<a name="36" href="#36">36</a> <em class="jxr_javadoccomment"> * The given document stream is consumed but not closed by this method.</em>
+<a name="37" href="#37">37</a> <em class="jxr_javadoccomment"> * The responsibility to close the stream remains on the caller.</em>
+<a name="38" href="#38">38</a> <em class="jxr_javadoccomment"> *</em>
+<a name="39" href="#39">39</a> <em class="jxr_javadoccomment"> * @param stream the document stream (input)</em>
+<a name="40" href="#40">40</a> <em class="jxr_javadoccomment"> * @param handler handler for the XHTML SAX events (output)</em>
+<a name="41" href="#41">41</a> <em class="jxr_javadoccomment"> * @param metadata document metadata (input and output)</em>
+<a name="42" href="#42">42</a> <em class="jxr_javadoccomment"> * @throws IOException if the document stream could not be read</em>
+<a name="43" href="#43">43</a> <em class="jxr_javadoccomment"> * @throws SAXException if the SAX events could not be processed</em>
+<a name="44" href="#44">44</a> <em class="jxr_javadoccomment"> * @throws TikaException if the document could not be parsed</em>
+<a name="45" href="#45">45</a> <em class="jxr_javadoccomment"> */</em>
+<a name="46" href="#46">46</a> <strong class="jxr_keyword">void</strong> parse(InputStream stream, ContentHandler handler, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="47" href="#47">47</a> <strong class="jxr_keyword">throws</strong> IOException, SAXException, TikaException;
+<a name="48" href="#48">48</a>
+<a name="49" href="#49">49</a> }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+
Added: incubator/tika/site/xref/org/apache/tika/parser/ParserDecorator.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/parser/ParserDecorator.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/parser/ParserDecorator.html (added)
+++ incubator/tika/site/xref/org/apache/tika/parser/ParserDecorator.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,74 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>ParserDecorator xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/parser/ParserDecorator.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a> <em class="jxr_javadoccomment">/**</em>
+<a name="2" href="#2">2</a> <em class="jxr_javadoccomment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a> <em class="jxr_javadoccomment"> * contributor license agreements. See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a> <em class="jxr_javadoccomment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a> <em class="jxr_javadoccomment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a> <em class="jxr_javadoccomment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a> <em class="jxr_javadoccomment"> * the License. You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a> <em class="jxr_javadoccomment"> *</em>
+<a name="9" href="#9">9</a> <em class="jxr_javadoccomment"> * <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a> <em class="jxr_javadoccomment"> *</em>
+<a name="11" href="#11">11</a> <em class="jxr_javadoccomment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a> <em class="jxr_javadoccomment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a> <em class="jxr_javadoccomment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a> <em class="jxr_javadoccomment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a> <em class="jxr_javadoccomment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a> <em class="jxr_javadoccomment"> */</em>
+<a name="17" href="#17">17</a> <strong class="jxr_keyword">package</strong> org.apache.tika.parser;
+<a name="18" href="#18">18</a>
+<a name="19" href="#19">19</a> <strong class="jxr_keyword">import</strong> java.io.IOException;
+<a name="20" href="#20">20</a> <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="21" href="#21">21</a>
+<a name="22" href="#22">22</a> <strong class="jxr_keyword">import</strong> org.apache.tika.exception.TikaException;
+<a name="23" href="#23">23</a> <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="24" href="#24">24</a> <strong class="jxr_keyword">import</strong> org.xml.sax.ContentHandler;
+<a name="25" href="#25">25</a> <strong class="jxr_keyword">import</strong> org.xml.sax.SAXException;
+<a name="26" href="#26">26</a>
+<a name="27" href="#27">27</a> <em class="jxr_javadoccomment">/**</em>
+<a name="28" href="#28">28</a> <em class="jxr_javadoccomment"> * Decorator base class for the {@link Parser} interface. This class</em>
+<a name="29" href="#29">29</a> <em class="jxr_javadoccomment"> * simply delegates all parsing calls to an underlying decorated parser</em>
+<a name="30" href="#30">30</a> <em class="jxr_javadoccomment"> * instance. Subclasses can provide extra decoration by overriding the</em>
+<a name="31" href="#31">31</a> <em class="jxr_javadoccomment"> * parse method.</em>
+<a name="32" href="#32">32</a> <em class="jxr_javadoccomment"> */</em>
+<a name="33" href="#33">33</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../org/apache/tika/parser/ParserDecorator.html">ParserDecorator</a> implements <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> {
+<a name="34" href="#34">34</a>
+<a name="35" href="#35">35</a> <em class="jxr_javadoccomment">/**</em>
+<a name="36" href="#36">36</a> <em class="jxr_javadoccomment"> * The decorated parser instance.</em>
+<a name="37" href="#37">37</a> <em class="jxr_javadoccomment"> */</em>
+<a name="38" href="#38">38</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">final</strong> <a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> parser;
+<a name="39" href="#39">39</a>
+<a name="40" href="#40">40</a> <em class="jxr_javadoccomment">/**</em>
+<a name="41" href="#41">41</a> <em class="jxr_javadoccomment"> * Creates a decorator for the given parser.</em>
+<a name="42" href="#42">42</a> <em class="jxr_javadoccomment"> *</em>
+<a name="43" href="#43">43</a> <em class="jxr_javadoccomment"> * @param parser the parser instance to be decorated</em>
+<a name="44" href="#44">44</a> <em class="jxr_javadoccomment"> */</em>
+<a name="45" href="#45">45</a> <strong class="jxr_keyword">public</strong> <a href="../../../../org/apache/tika/parser/ParserDecorator.html">ParserDecorator</a>(<a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> parser) {
+<a name="46" href="#46">46</a> <strong class="jxr_keyword">this</strong>.parser = parser;
+<a name="47" href="#47">47</a> }
+<a name="48" href="#48">48</a>
+<a name="49" href="#49">49</a> <em class="jxr_javadoccomment">/**</em>
+<a name="50" href="#50">50</a> <em class="jxr_javadoccomment"> * Delegates the method call to the decorated parser. Subclasses should</em>
+<a name="51" href="#51">51</a> <em class="jxr_javadoccomment"> * override this method (and use <code>super.parse()</code> to invoke</em>
+<a name="52" href="#52">52</a> <em class="jxr_javadoccomment"> * the decorated parser) to implement extra decoration.</em>
+<a name="53" href="#53">53</a> <em class="jxr_javadoccomment"> */</em>
+<a name="54" href="#54">54</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> parse(
+<a name="55" href="#55">55</a> InputStream stream, ContentHandler handler, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="56" href="#56">56</a> <strong class="jxr_keyword">throws</strong> IOException, SAXException, <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a> {
+<a name="57" href="#57">57</a> parser.parse(stream, handler, metadata);
+<a name="58" href="#58">58</a> }
+<a name="59" href="#59">59</a>
+<a name="60" href="#60">60</a> }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+
Added: incubator/tika/site/xref/org/apache/tika/parser/ParserPostProcessor.html
URL: http://svn.apache.org/viewvc/incubator/tika/site/xref/org/apache/tika/parser/ParserPostProcessor.html?rev=596146&view=auto
==============================================================================
--- incubator/tika/site/xref/org/apache/tika/parser/ParserPostProcessor.html (added)
+++ incubator/tika/site/xref/org/apache/tika/parser/ParserPostProcessor.html Sun Nov 18 14:20:54 2007
@@ -0,0 +1,96 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1" />
+<title>ParserPostProcessor xref</title>
+<link type="text/css" rel="stylesheet" href="../../../../stylesheet.css" />
+</head>
+<body>
+<div id="overview"><a href="../../../../../apidocs/org/apache/tika/parser/ParserPostProcessor.html">View Javadoc</a></div><pre>
+
+<a name="1" href="#1">1</a> <em class="jxr_javadoccomment">/**</em>
+<a name="2" href="#2">2</a> <em class="jxr_javadoccomment"> * Licensed to the Apache Software Foundation (ASF) under one or more</em>
+<a name="3" href="#3">3</a> <em class="jxr_javadoccomment"> * contributor license agreements. See the NOTICE file distributed with</em>
+<a name="4" href="#4">4</a> <em class="jxr_javadoccomment"> * this work for additional information regarding copyright ownership.</em>
+<a name="5" href="#5">5</a> <em class="jxr_javadoccomment"> * The ASF licenses this file to You under the Apache License, Version 2.0</em>
+<a name="6" href="#6">6</a> <em class="jxr_javadoccomment"> * (the "License"); you may not use this file except in compliance with</em>
+<a name="7" href="#7">7</a> <em class="jxr_javadoccomment"> * the License. You may obtain a copy of the License at</em>
+<a name="8" href="#8">8</a> <em class="jxr_javadoccomment"> *</em>
+<a name="9" href="#9">9</a> <em class="jxr_javadoccomment"> * <a href="http://www.apache.org/licenses/LICENSE-2.0" target="alexandria_uri">http://www.apache.org/licenses/LICENSE-2.0</a></em>
+<a name="10" href="#10">10</a> <em class="jxr_javadoccomment"> *</em>
+<a name="11" href="#11">11</a> <em class="jxr_javadoccomment"> * Unless required by applicable law or agreed to in writing, software</em>
+<a name="12" href="#12">12</a> <em class="jxr_javadoccomment"> * distributed under the License is distributed on an "AS IS" BASIS,</em>
+<a name="13" href="#13">13</a> <em class="jxr_javadoccomment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</em>
+<a name="14" href="#14">14</a> <em class="jxr_javadoccomment"> * See the License for the specific language governing permissions and</em>
+<a name="15" href="#15">15</a> <em class="jxr_javadoccomment"> * limitations under the License.</em>
+<a name="16" href="#16">16</a> <em class="jxr_javadoccomment"> */</em>
+<a name="17" href="#17">17</a> <strong class="jxr_keyword">package</strong> org.apache.tika.parser;
+<a name="18" href="#18">18</a>
+<a name="19" href="#19">19</a> <strong class="jxr_keyword">import</strong> java.io.IOException;
+<a name="20" href="#20">20</a> <strong class="jxr_keyword">import</strong> java.io.InputStream;
+<a name="21" href="#21">21</a> <strong class="jxr_keyword">import</strong> java.io.StringWriter;
+<a name="22" href="#22">22</a>
+<a name="23" href="#23">23</a> <strong class="jxr_keyword">import</strong> org.apache.oro.text.regex.MalformedPatternException;
+<a name="24" href="#24">24</a> <strong class="jxr_keyword">import</strong> org.apache.tika.exception.TikaException;
+<a name="25" href="#25">25</a> <strong class="jxr_keyword">import</strong> org.apache.tika.metadata.Metadata;
+<a name="26" href="#26">26</a> <strong class="jxr_keyword">import</strong> org.apache.tika.sax.TeeContentHandler;
+<a name="27" href="#27">27</a> <strong class="jxr_keyword">import</strong> org.apache.tika.sax.WriteOutContentHandler;
+<a name="28" href="#28">28</a> <strong class="jxr_keyword">import</strong> org.apache.tika.utils.RegexUtils;
+<a name="29" href="#29">29</a> <strong class="jxr_keyword">import</strong> org.xml.sax.ContentHandler;
+<a name="30" href="#30">30</a> <strong class="jxr_keyword">import</strong> org.xml.sax.SAXException;
+<a name="31" href="#31">31</a>
+<a name="32" href="#32">32</a> <em class="jxr_javadoccomment">/**</em>
+<a name="33" href="#33">33</a> <em class="jxr_javadoccomment"> * Parser decorator that post-processes the results from a decorated parser.</em>
+<a name="34" href="#34">34</a> <em class="jxr_javadoccomment"> * The post-processing takes care of filling in any "fulltext", "summary", and</em>
+<a name="35" href="#35">35</a> <em class="jxr_javadoccomment"> * regexp {@link Content} objects with the full text content returned by</em>
+<a name="36" href="#36">36</a> <em class="jxr_javadoccomment"> * the decorated parser. The post-processing also catches and logs any</em>
+<a name="37" href="#37">37</a> <em class="jxr_javadoccomment"> * exceptions thrown by the decorated parser.</em>
+<a name="38" href="#38">38</a> <em class="jxr_javadoccomment"> */</em>
+<a name="39" href="#39">39</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">class</strong> <a href="../../../../org/apache/tika/parser/ParserPostProcessor.html">ParserPostProcessor</a> <strong class="jxr_keyword">extends</strong> <a href="../../../../org/apache/tika/parser/ParserDecorator.html">ParserDecorator</a> {
+<a name="40" href="#40">40</a>
+<a name="41" href="#41">41</a> <strong class="jxr_keyword">private</strong> <strong class="jxr_keyword">static</strong> <strong class="jxr_keyword">final</strong> String LINK_PATTERN =
+<a name="42" href="#42">42</a> <span class="jxr_string">"([A-Za-z][A-Za-z0-9+.-]{1,120}:"</span>
+<a name="43" href="#43">43</a> + <span class="jxr_string">"[A-Za-z0-9/](([A-Za-z0-9$_.+!*,;/?:@&~=-])|%[A-Fa-f0-9]{2}){1,333}"</span>
+<a name="44" href="#44">44</a> + <span class="jxr_string">"(#([a-zA-Z0-9][a-zA-Z0-9$_.+!*,;/?:@&~=%-]{0,1000}))?)"</span>;
+<a name="45" href="#45">45</a>
+<a name="46" href="#46">46</a> <em class="jxr_javadoccomment">/**</em>
+<a name="47" href="#47">47</a> <em class="jxr_javadoccomment"> * Creates a post-processing decorator for the given parser.</em>
+<a name="48" href="#48">48</a> <em class="jxr_javadoccomment"> *</em>
+<a name="49" href="#49">49</a> <em class="jxr_javadoccomment"> * @param parser the parser to be decorated</em>
+<a name="50" href="#50">50</a> <em class="jxr_javadoccomment"> */</em>
+<a name="51" href="#51">51</a> <strong class="jxr_keyword">public</strong> <a href="../../../../org/apache/tika/parser/ParserPostProcessor.html">ParserPostProcessor</a>(<a href="../../../../org/apache/tika/parser/Parser.html">Parser</a> parser) {
+<a name="52" href="#52">52</a> <strong class="jxr_keyword">super</strong>(parser);
+<a name="53" href="#53">53</a> }
+<a name="54" href="#54">54</a>
+<a name="55" href="#55">55</a> <em class="jxr_javadoccomment">/**</em>
+<a name="56" href="#56">56</a> <em class="jxr_javadoccomment"> * Forwards the call to the delegated parser and post-processes the</em>
+<a name="57" href="#57">57</a> <em class="jxr_javadoccomment"> * results as described above.</em>
+<a name="58" href="#58">58</a> <em class="jxr_javadoccomment"> */</em>
+<a name="59" href="#59">59</a> <strong class="jxr_keyword">public</strong> <strong class="jxr_keyword">void</strong> parse(
+<a name="60" href="#60">60</a> InputStream stream, ContentHandler handler, <a href="../../../../org/apache/tika/metadata/Metadata.html">Metadata</a> metadata)
+<a name="61" href="#61">61</a> <strong class="jxr_keyword">throws</strong> IOException, SAXException, <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a> {
+<a name="62" href="#62">62</a> StringWriter writer = <strong class="jxr_keyword">new</strong> StringWriter();
+<a name="63" href="#63">63</a> handler = <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/sax/TeeContentHandler.html">TeeContentHandler</a>(
+<a name="64" href="#64">64</a> handler, <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/sax/WriteOutContentHandler.html">WriteOutContentHandler</a>(writer));
+<a name="65" href="#65">65</a> <strong class="jxr_keyword">super</strong>.parse(stream, handler, metadata);
+<a name="66" href="#66">66</a>
+<a name="67" href="#67">67</a> String content = writer.toString();
+<a name="68" href="#68">68</a> metadata.set(<span class="jxr_string">"fulltext"</span>, content);
+<a name="69" href="#69">69</a>
+<a name="70" href="#70">70</a> <strong class="jxr_keyword">int</strong> length = Math.min(content.length(), 500);
+<a name="71" href="#71">71</a> metadata.set(<span class="jxr_string">"summary"</span>, content.substring(0, length));
+<a name="72" href="#72">72</a>
+<a name="73" href="#73">73</a> <strong class="jxr_keyword">try</strong> {
+<a name="74" href="#74">74</a> <strong class="jxr_keyword">for</strong> (String link : RegexUtils.extract(content, LINK_PATTERN)) {
+<a name="75" href="#75">75</a> metadata.add(<span class="jxr_string">"outlinks"</span>, link);
+<a name="76" href="#76">76</a> }
+<a name="77" href="#77">77</a> } <strong class="jxr_keyword">catch</strong> (MalformedPatternException e) {
+<a name="78" href="#78">78</a> <strong class="jxr_keyword">throw</strong> <strong class="jxr_keyword">new</strong> <a href="../../../../org/apache/tika/exception/TikaException.html">TikaException</a>(<span class="jxr_string">"Malformed URL pattern"</span>, e);
+<a name="79" href="#79">79</a> }
+<a name="80" href="#80">80</a> }
+<a name="81" href="#81">81</a>
+<a name="82" href="#82">82</a> }
+</pre>
+<hr/><div id="footer">This page was automatically generated by <a href="http://maven.apache.org/">Maven</a></div></body>
+</html>
+