You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2010/02/12 07:59:42 UTC

svn commit: r909269 - in /lucene/nutch/trunk/src/plugin/parse-tika: lib/ sample/

Author: mattmann
Date: Fri Feb 12 06:59:40 2010
New Revision: 909269

URL: http://svn.apache.org/viewvc?rev=909269&view=rev
Log:
- 2nd part of NUTCH-766 Tika parser

Added:
    lucene/nutch/trunk/src/plugin/parse-tika/lib/
    lucene/nutch/trunk/src/plugin/parse-tika/lib/asm-3.1.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/bcmail-jdk14-136.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/bcprov-jdk14-136.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/commons-compress-1.0.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/commons-logging-1.1.1.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/dom4j-1.6.1.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/fontbox-0.8.0-incubator.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/geronimo-stax-api_1.0_spec-1.0.1.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/jempbox-0.8.0-incubator.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/metadata-extractor-2.4.0-beta-1.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/pdfbox-0.8.0-incubating.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-3.6.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-ooxml-3.6.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-ooxml-schemas-3.6.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-scratchpad-3.6.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/tagsoup-1.2.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/tika-parsers-0.6.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/xml-apis-1.0.b2.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/lib/xmlbeans-2.3.0.jar   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/sample/
    lucene/nutch/trunk/src/plugin/parse-tika/sample/encrypted.pdf   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/sample/nutch.html
    lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.odt   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.sxw   (with props)
    lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.txt
    lucene/nutch/trunk/src/plugin/parse-tika/sample/pdftest.pdf
    lucene/nutch/trunk/src/plugin/parse-tika/sample/test.rtf
    lucene/nutch/trunk/src/plugin/parse-tika/sample/word97.doc   (with props)

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/asm-3.1.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/asm-3.1.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/asm-3.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/bcmail-jdk14-136.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/bcmail-jdk14-136.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/bcmail-jdk14-136.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/bcprov-jdk14-136.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/bcprov-jdk14-136.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/bcprov-jdk14-136.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/commons-compress-1.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/commons-compress-1.0.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/commons-compress-1.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/commons-logging-1.1.1.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/commons-logging-1.1.1.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/commons-logging-1.1.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/dom4j-1.6.1.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/dom4j-1.6.1.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/dom4j-1.6.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/fontbox-0.8.0-incubator.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/fontbox-0.8.0-incubator.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/fontbox-0.8.0-incubator.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/geronimo-stax-api_1.0_spec-1.0.1.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/geronimo-stax-api_1.0_spec-1.0.1.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/geronimo-stax-api_1.0_spec-1.0.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/jempbox-0.8.0-incubator.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/jempbox-0.8.0-incubator.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/jempbox-0.8.0-incubator.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/metadata-extractor-2.4.0-beta-1.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/metadata-extractor-2.4.0-beta-1.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/metadata-extractor-2.4.0-beta-1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/pdfbox-0.8.0-incubating.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/pdfbox-0.8.0-incubating.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/pdfbox-0.8.0-incubating.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-3.6.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-3.6.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-3.6.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-ooxml-3.6.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-ooxml-3.6.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-ooxml-3.6.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-ooxml-schemas-3.6.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-ooxml-schemas-3.6.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-ooxml-schemas-3.6.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-scratchpad-3.6.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-scratchpad-3.6.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/poi-scratchpad-3.6.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/tagsoup-1.2.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/tagsoup-1.2.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/tagsoup-1.2.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/tika-parsers-0.6.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/tika-parsers-0.6.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/tika-parsers-0.6.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/xml-apis-1.0.b2.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/xml-apis-1.0.b2.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/xml-apis-1.0.b2.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/lib/xmlbeans-2.3.0.jar
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/lib/xmlbeans-2.3.0.jar?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/lib/xmlbeans-2.3.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/sample/encrypted.pdf
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/sample/encrypted.pdf?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/sample/encrypted.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/sample/nutch.html
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/sample/nutch.html?rev=909269&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-tika/sample/nutch.html (added)
+++ lucene/nutch/trunk/src/plugin/parse-tika/sample/nutch.html Fri Feb 12 06:59:40 2010
@@ -0,0 +1,519 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html>
+<head>
+<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+<meta content="Apache Forrest" name="Generator">
+<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-skin-name" content="lucene">
+<title>Welcome to Nutch!</title>
+<link type="text/css" href="skin/basic.css" rel="stylesheet">
+<link media="screen" type="text/css" href="skin/screen.css" rel="stylesheet">
+<link media="print" type="text/css" href="skin/print.css" rel="stylesheet">
+<link type="text/css" href="skin/profile.css" rel="stylesheet">
+<script src="skin/getBlank.js" language="javascript" type="text/javascript"></script><script src="skin/getMenu.js" language="javascript" type="text/javascript"></script><script src="skin/fontsize.js" language="javascript" type="text/javascript"></script>
+<link rel="shortcut icon" href="images/favicon.ico">
+</head>
+<body onload="init()">
+<script type="text/javascript">ndeSetTextSize();</script>
+<div id="top">
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+<a href="http://www.apache.org/">Apache</a> &gt; <a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+</div>
+<!--+
+    |header
+    +-->
+<div class="header">
+<!--+
+    |start group logo
+    +-->
+<div class="grouplogo">
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="images/lucene_green_150.gif" title="Apache Lucene"></a>
+</div>
+<!--+
+    |end group logo
+    +-->
+<!--+
+    |start Project Logo
+    +-->
+<div class="projectlogo">
+<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
+</div>
+<!--+
+    |end Project Logo
+    +-->
+<!--+
+    |start Search
+    +-->
+<div class="searchbox">
+<form action="http://search.lucidimagination.com/p:nutch" method="get" class="roundtopsmall">
+<input onFocus="getBlank (this, 'Search the site with Solr');" size="25" name="q" id="query" type="text" value="Search the site with Solr">&nbsp; 
+                    <input name="Search" value="Search" type="submit">
+</form>
+<div style="position: relative; top: -5px; left: -10px">Powered by <a href="http://www.lucidimagination.com" style="color: #033268">Lucid Imagination</a>
+</div>
+</div>
+<!--+
+    |end search
+    +-->
+<!--+
+    |start Tabs
+    +-->
+<ul id="tabs">
+<li class="current">
+<a class="selected" href="index.html">Main</a>
+</li>
+<li>
+<a class="unselected" href="http://wiki.apache.org/nutch/">Wiki</a>
+</li>
+<li>
+<a class="unselected" href="http://issues.apache.org/jira/browse/Nutch">Jira</a>
+</li>
+</ul>
+<!--+
+    |end Tabs
+    +-->
+</div>
+</div>
+<div id="main">
+<div id="publishedStrip">
+<!--+
+    |start Subtabs
+    +-->
+<div id="level2tabs"></div>
+<!--+
+    |end Endtabs
+    +-->
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<!--+
+    |breadtrail
+    +-->
+<div class="breadtrail">
+
+             &nbsp;
+           </div>
+<!--+
+    |start Menu, mainarea
+    +-->
+<!--+
+    |start Menu
+    +-->
+<div id="menu">
+<div onclick="SwitchMenu('menu_selected_1.1', 'skin/')" id="menu_selected_1.1Title" class="menutitle" style="background-image: url('skin/images/chapter_open.gif');">Project</div>
+<div id="menu_selected_1.1" class="selectedmenuitemgroup" style="display: block;">
+<div class="menupage">
+<div class="menupagetitle">News</div>
+</div>
+<div class="menuitem">
+<a href="about.html">About</a>
+</div>
+<div class="menuitem">
+<a href="credits.html">Credits</a>
+</div>
+<div class="menuitem">
+<a href="http://www.cafepress.com/nutch/">Buy Stuff</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.2', 'skin/')" id="menu_1.2Title" class="menutitle">Documentation</div>
+<div id="menu_1.2" class="menuitemgroup">
+<div class="menuitem">
+<a href="http://wiki.apache.org/nutch/FAQ">FAQ</a>
+</div>
+<div class="menuitem">
+<a href="http://wiki.apache.org/nutch/">Wiki</a>
+</div>
+<div class="menuitem">
+<a href="tutorial.html">Tutorial (0.7.2)</a>
+</div>
+<div class="menuitem">
+<a href="tutorial8.html">Tutorial (0.8.x)</a>
+</div>
+<div class="menuitem">
+<a href="bot.html">Robot     </a>
+</div>
+<div class="menuitem">
+<a href="i18n.html">i18n</a>
+</div>
+<div class="menuitem">
+<a href="apidocs-1.0/index.html">API Docs (1.0)</a>
+</div>
+<div class="menuitem">
+<a href="apidocs-0.9/index.html">API Docs (0.9)</a>
+</div>
+<div class="menuitem">
+<a href="apidocs-0.8.x/index.html">API Docs (0.8.x)</a>
+</div>
+<div class="menuitem">
+<a href="apidocs/index.html">API Docs (0.7.2)</a>
+</div>
+<div class="menuitem">
+<a href="http://lucene.zones.apache.org:8080/hudson/job/Nutch-Nightly/ws/trunk/build/docs/api/index.html">API Docs (nightly)</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.3', 'skin/')" id="menu_1.3Title" class="menutitle">Resources</div>
+<div id="menu_1.3" class="menuitemgroup">
+<div class="menuitem">
+<a href="release/">Download</a>
+</div>
+<div class="menuitem">
+<a href="nightly.html">Nightly builds</a>
+</div>
+<div class="menuitem">
+<a href="mailing_lists.html">Mailing Lists</a>
+</div>
+<div class="menuitem">
+<a href="issue_tracking.html">Issue Tracking</a>
+</div>
+<div class="menuitem">
+<a href="version_control.html">Version Control</a>
+</div>
+</div>
+<div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
+<div id="menu_1.4" class="menuitemgroup">
+<div class="menuitem">
+<a href="http://lucene.apache.org/java/">Lucene Java</a>
+</div>
+<div class="menuitem">
+<a href="http://lucene.apache.org/hadoop/">Hadoop</a>
+</div>
+<div class="menuitem">
+<a href="http://incubator.apache.org/solr/">Solr</a>
+</div>
+</div>
+<div id="credit">
+<hr>
+<a href="http://forrest.apache.org/"><img border="0" title="Built with Apache Forrest" alt="Built with Apache Forrest - logo" src="images/built-with-forrest-button.png" style="width: 88px;height: 31px;"></a>
+</div>
+<div id="roundbottom">
+<img style="display: none" class="corner" height="15" width="15" alt="" src="skin/images/rc-b-l-15-1body-2menu-3menu.png"></div>
+<!--+
+  |alternative credits
+  +-->
+<div id="credit2"></div>
+</div>
+<!--+
+    |end Menu
+    +-->
+<!--+
+    |start content
+    +-->
+<div id="content">
+<div title="Portable Document Format" class="pdflink">
+<a class="dida" href="index.pdf"><img alt="PDF -icon" src="skin/images/pdfdoc.gif" class="skin"><br>
+        PDF</a>
+</div>
+<h1>Welcome to Nutch!</h1>
+<div id="minitoc-area">
+<ul class="minitoc">
+<li>
+<a href="#News">News</a>
+<ul class="minitoc">
+<li>
+<a href="#14+August+2009+-+Lucene+at+US+ApacheCon">14 August 2009 - Lucene at US ApacheCon</a>
+</li>
+<li>
+<a href="#23+March+2009+-+Apache+Nutch+1.0+Released">23 March 2009 - Apache Nutch 1.0 Released</a>
+</li>
+<li>
+<a href="#09+February+2009+-+Lucene+at+ApacheCon+Europe+2009+in%0A%09%09%09Amsterdam">09 February 2009 - Lucene at ApacheCon Europe 2009 in
+			Amsterdam</a>
+</li>
+<li>
+<a href="#2+April+2007%3A+Nutch+0.9+Released">2 April 2007: Nutch 0.9 Released</a>
+</li>
+<li>
+<a href="#24+September+2006%3A+Nutch+0.8.1+Released">24 September 2006: Nutch 0.8.1 Released</a>
+</li>
+<li>
+<a href="#25+July+2006%3A+Nutch+0.8+Released">25 July 2006: Nutch 0.8 Released</a>
+</li>
+<li>
+<a href="#31+March+2006%3A+Nutch+0.7.2+Released">31 March 2006: Nutch 0.7.2 Released</a>
+</li>
+<li>
+<a href="#1+October+2005%3A+Nutch+0.7.1+Released">1 October 2005: Nutch 0.7.1 Released</a>
+</li>
+<li>
+<a href="#17+August+2005%3A+Nutch+0.7+Released">17 August 2005: Nutch 0.7 Released</a>
+</li>
+<li>
+<a href="#June+2005%3A+Nutch+graduates+from+Incubator">June 2005: Nutch graduates from Incubator</a>
+</li>
+<li>
+<a href="#January+2005%3A+Nutch+Joins+Apache+Incubator">January 2005: Nutch Joins Apache Incubator</a>
+</li>
+<li>
+<a href="#September+2004%3A+Creative+Commons+launches+Nutch-based+Search">September 2004: Creative Commons launches Nutch-based Search</a>
+</li>
+<li>
+<a href="#September+2004%3A+Oregon+State+University+switches+to+Nutch">September 2004: Oregon State University switches to Nutch</a>
+</li>
+</ul>
+</li>
+</ul>
+</div> 
+
+    
+<a name="N1000D"></a><a name="News"></a>
+<h2 class="h3">News</h2>
+<div class="section">
+<a name="N10013"></a><a name="14+August+2009+-+Lucene+at+US+ApacheCon"></a>
+<h3 class="h4">14 August 2009 - Lucene at US ApacheCon</h3>
+<p>
+        
+<a href="http://www.us.apachecon.com/c/acus2009/" title="ApacheCon US 2009">
+            <img alt="ApacheCon Logo" class="float-right" src="http://www.apache.org/events/current-event-125x125.png">
+        </a>
+        ApacheCon US is once again in the Bay Area and Lucene is coming
+        along for the ride! The Lucene community has planned two full
+        days of talks, plus a meetup and the usual bevy of training.
+        With a well-balanced mix of first time and veteran ApacheCon
+        speakers, the
+        <a href="http://www.us.apachecon.com/c/acus2009/schedule#lucene">Lucene track</a>
+        at ApacheCon US promises to have something for everyone. Be sure
+        not to miss:
+    </p>
+<p> Training:</p>
+<ul>
+        
+<li>
+            
+<a href="http://www.us.apachecon.com/c/acus2009/sessions/437">Lucene Boot Camp</a>
+            - A two day training session, Nov. 2nd &amp; 3rd
+        </li>
+        
+<li>
+            
+<a href="http://www.us.apachecon.com/c/acus2009/sessions/375">Solr Day</a>
+            - A one day training session, Nov. 2nd
+        </li>
+    
+</ul>
+<p>Thursday, Nov. 5th</p>
+<ul>
+        
+<li>
+            
+<a href="http://www.us.apachecon.com/c/acus2009/sessions/428">Introduction to the Lucene Ecosystem
+            </a>
+            - Grant Ingersoll @ 9:00
+        </li>
+        
+<li>
+            
+<a href="http://www.us.apachecon.com/c/acus2009/sessions/461">Lucene Basics and New Features</a>
+            - Michael Busch @ 10:00
+        </li>
+        
+<li>
+            
+<a href="http://www.us.apachecon.com/c/acus2009/sessions/331">Apache Solr: Out of the Box</a>
+            - Chris Hostetter @ 14:00
+        </li>
+        
+<li>
+            
+<a href="http://www.us.apachecon.com/c/acus2009/sessions/427">Introduction to Nutch</a>
+            - Andrzej Bialecki @ 15:00
+        </li>
+        
+<li>
+            
+<a href="http://www.us.apachecon.com/c/acus2009/sessions/430">Lucene and Solr Performance Tuning</a>
+            - Mark Miller @ 16:30
+        </li>
+    
+</ul>
+<p>Friday, Nov. 6th</p>
+<ul>
+        
+<li>
+            
+<a href="http://www.us.apachecon.com/c/acus2009/sessions/332">Implementing an Information Retrieval
+                Framework for an Organizational Repository</a>
+            - Sithu D Sudarsan @ 9:00
+        </li>
+        
+<li>
+            
+<a href="http://www.us.apachecon.com/c/acus2009/sessions/333">Apache Mahout - Going from raw data to
+                Information</a>
+            - Isabel Drost @ 10:00
+        </li>
+        
+<li>
+            
+<a href="http://www.us.apachecon.com/c/acus2009/sessions/334">MIME Magic with Apache Tika</a>
+            - Jukka Zitting @ 11:30
+        </li>
+        
+<li>
+            
+<a href="http://www.us.apachecon.com/c/acus2009/sessions/335">Building Intelligent Search Applications
+                with the Lucene Ecosystem</a>
+            - Ted Dunning @ 14:00
+        </li>
+        
+<li>
+            
+<a href="http://www.us.apachecon.com/c/acus2009/sessions/462">Realtime Search</a>
+            - Jason Rutherglen @ 15:00
+        </li>
+    
+</ul>
+<a name="N10091"></a><a name="23+March+2009+-+Apache+Nutch+1.0+Released"></a>
+<h3 class="h4">23 March 2009 - Apache Nutch 1.0 Released</h3>
+<p>The 1.0 release of Nutch is now available. This release includes several major feature improvements
+      such as new indexing framework, new scoring framework, Apache Solr integration just to mention a few.
+      See <a href="http://www.apache.org/dist/lucene/nutch/CHANGES-1.0.txt">
+      list of changes</a>  made in this version. The release is available
+      <a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
+<a name="N100A3"></a><a name="09+February+2009+-+Lucene+at+ApacheCon+Europe+2009+in%0A%09%09%09Amsterdam"></a>
+<h3 class="h4">09 February 2009 - Lucene at ApacheCon Europe 2009 in
+			Amsterdam</h3>
+<p>
+			
+<a href="http://www.eu.apachecon.com/c/aceu2009/" title="ApacheCon EU 2009">
+				<img alt="ApacheCon EU 2009 Logo" class="float-right" src="http://www.eu.apachecon.com/page_attachments/0000/0115/125x125_basic.gif">
+			</a>
+
+			Lucene will be extremely well represented at
+			<a href="http://www.eu.apachecon.com/c/aceu2009/">ApacheCon EU 2009</a>
+			in Amsterdam, Netherlands this March 23-27, 2009:
+		</p>
+<ul>
+			
+<li>
+				
+<a href="http://eu.apachecon.com/c/aceu2009/sessions/197">Lucene Boot Camp</a>
+				- A two day training session, March 23 &amp; 24th</li>
+                
+<li>
+<a href="http://eu.apachecon.com/c/aceu2009/sessions/201">Solr Boot Camp</a> - A one day training session, March 24th</li>
+                
+<li>
+<a href="http://eu.apachecon.com/c/aceu2009/sessions/136">Introducing Apache Mahout</a> - Grant Ingersoll. March 25th @ 10:30</li>
+                
+<li>
+<a href="http://eu.apachecon.com/c/aceu2009/sessions/137">Lucene/Solr Case Studies</a> - Erik Hatcher. March 25th @ 11:30</li>
+                
+<li>
+<a href="http://eu.apachecon.com/c/aceu2009/sessions/138">Advanced Indexing Techniques with Apache Lucene</a> - Michael Busch. March 25th @ 14:00</li>  
+                   
+<li>
+<a href="http://eu.apachecon.com/c/aceu2009/sessions/251">Apache Solr - A Case Study</a> - Uri Boness. March 26th @ 17:30</li>
+           
+<li>
+<a href="http://eu.apachecon.com/c/aceu2009/sessions/250">Best of breed - httpd, forrest, solr and droids</a> - Thorsten Scherler. March 27th @ 17:30</li>
+           
+<li>
+<a href="http://eu.apachecon.com/c/aceu2009/sessions/165">Apache Droids - an intelligent standalone robot framework</a> - Thorsten Scherler. March 26th @ 15:00</li>
+
+               
+</ul>
+<a name="N100EF"></a><a name="2+April+2007%3A+Nutch+0.9+Released"></a>
+<h3 class="h4">2 April 2007: Nutch 0.9 Released</h3>
+<p>The 0.9 release of Nutch is now available. This is the second release of Nutch
+      based entirely on the underlying Hadoop platform. This release includes several critical
+      bug fixes, as well as key speedups described in more detail at 
+      <a href="http://blog.foofactory.fi/2007/03/twice-speed-half-size.html">Sami Siren's blog</a>.
+      See <a href="http://www.apache.org/dist/lucene/nutch/CHANGES-0.9.txt">
+      list of changes</a>  made in this version. The release is available
+      <a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
+<a name="N10105"></a><a name="24+September+2006%3A+Nutch+0.8.1+Released"></a>
+<h3 class="h4">24 September 2006: Nutch 0.8.1 Released</h3>
+<p>The 0.8.1 release of Nutch is now available. This is a maintenance release to 0.8 branch fixing many serous bugs found in version 0.8.
+      See <a href="http://www.apache.org/dist/lucene/nutch/CHANGES-0.8.1.txt">
+      list of changes</a>  made in this version. The release is available
+      <a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
+<a name="N10117"></a><a name="25+July+2006%3A+Nutch+0.8+Released"></a>
+<h3 class="h4">25 July 2006: Nutch 0.8 Released</h3>
+<p>The 0.8 release of Nutch is now available. This is the first release of Nutch based on
+      hadoop architecure. See <a href="http://svn.apache.org/viewvc/lucene/nutch/tags/release-0.8/CHANGES.txt?view=markup">
+      CHANGES.txt</a> for list of changes made in this version. The release is available
+      <a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
+<a name="N10129"></a><a name="31+March+2006%3A+Nutch+0.7.2+Released"></a>
+<h3 class="h4">31 March 2006: Nutch 0.7.2 Released</h3>
+<p>The 0.7.2 release of Nutch is now available. This is a bug fix release for 0.7 branch. See
+      <a href="http://svn.apache.org/viewcvs.cgi/lucene/nutch/branches/branch-0.7/CHANGES.txt?rev=390158">
+      CHANGES.txt</a> for details. The release is available
+      <a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
+<a name="N1013B"></a><a name="1+October+2005%3A+Nutch+0.7.1+Released"></a>
+<h3 class="h4">1 October 2005: Nutch 0.7.1 Released</h3>
+<p>The 0.7.1 release of Nutch is now available. This is a bug fix release. See
+      <a href="http://svn.apache.org/viewcvs.cgi/lucene/nutch/branches/branch-0.7/CHANGES.txt?rev=292986">
+      CHANGES.txt</a> for details. The release is available
+      <a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
+<a name="N1014D"></a><a name="17+August+2005%3A+Nutch+0.7+Released"></a>
+<h3 class="h4">17 August 2005: Nutch 0.7 Released</h3>
+<p>This is the first Nutch release as an Apache Lucene sub-project. See 
+      <a href="http://svn.apache.org/viewcvs.cgi/lucene/nutch/trunk/CHANGES.txt?rev=233150">
+      CHANGES.txt</a> for details. The release is available 
+      <a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
+<a name="N1015F"></a><a name="June+2005%3A+Nutch+graduates+from+Incubator"></a>
+<h3 class="h4">June 2005: Nutch graduates from Incubator</h3>
+<p>Nutch has now graduated from the Apache incubator, and is now
+      a Subproject of Lucene.</p>
+<a name="N10169"></a><a name="January+2005%3A+Nutch+Joins+Apache+Incubator"></a>
+<h3 class="h4">January 2005: Nutch Joins Apache Incubator</h3>
+<p>Nutch is a two-year-old open source project, previously
+        hosted at Sourceforge and backed by its own non-profit
+        organization. The non-profit was founded in order to assign
+        copyright, so that we could retain the right to change the
+        license. We have now determined that the Apache license is the
+        appropriate license for Nutch and no longer require the
+        overhead of an independent non-profit organization. Nutch's
+        board of directors and its developers were both polled and
+        supported the move to the Apache foundation.</p>
+<a name="N10173"></a><a name="September+2004%3A+Creative+Commons+launches+Nutch-based+Search"></a>
+<h3 class="h4">September 2004: Creative Commons launches Nutch-based Search</h3>
+<p>Creative Commons unveiled a beta version of its search
+      engine, which scours the web for text, images, audio, and video
+      free to re-use on certain terms a search refinement offered by
+      no other company or organization.</p>
+<p>See the <a href="http://creativecommons.org/press-releases/entry/5064">Creative
+      Commons Press Release</a> for more details.</p>
+<a name="N10184"></a><a name="September+2004%3A+Oregon+State+University+switches+to+Nutch"></a>
+<h3 class="h4">September 2004: Oregon State University switches to Nutch</h3>
+<p>Oregon State University is converting its searching
+      infrastructure from Googletm to the open source project
+      Nutch. The effort to replace the Googletm will realize
+      significant cost savings for Oregon State University, while
+      promoting both the Nutch Search Engine and transparency in
+      search engine use and management.</p>
+<p>For more details see the announcement by OSU's <a href="http://osuosl.org/news_folder/nutch">Open Source
+      Lab</a>.</p>
+</div>
+
+  
+</div>
+<!--+
+    |end content
+    +-->
+<div class="clearboth">&nbsp;</div>
+</div>
+<div id="footer">
+<!--+
+    |start bottomstrip
+    +-->
+<div class="lastmodified">
+<script type="text/javascript"><!--
+document.write("Last Published: " + document.lastModified);
+//  --></script>
+</div>
+<div class="copyright">
+        Copyright &copy;
+         2006 <a href="http://www.apache.org/licenses/">The Apache Software Foundation.</a>
+</div>
+<div id="logos"></div>
+<!--+
+    |end bottomstrip
+    +-->
+</div>
+</body>
+</html>

Added: lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.odt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.odt?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.odt
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.sxw
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.sxw?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.sxw
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.txt?rev=909269&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.txt (added)
+++ lucene/nutch/trunk/src/plugin/parse-tika/sample/ootest.txt Fri Feb 12 06:59:40 2010
@@ -0,0 +1,30 @@
+Abcedfg				?????
+Abcdefg
+Abcdefg
+abcdefg
+
+
+
+
+
+
+
+
+
+
+ http://www.openoffice.org
+
+Title
+Col1
+Col2
+Col3
+head
+Cell1
+Cell2
+Cel3
+total
+TOTAL
+TOTAL
+TOTAL
+
+Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Integer a leo in lacus malesuada ornare. Mauris sagittis. Nam vestibulum. Nunc gravida vestibulum augue. Praesent sed lectus quis lectus adipiscing bibendum. Sed nulla. Duis posuere justo eget urna. Proin lorem orci, vestibulum ut, consequat molestie, eleifend a, nibh. Mauris sed lacus. Etiam blandit tincidunt neque. Cras ac sapien. Duis erat. 

Added: lucene/nutch/trunk/src/plugin/parse-tika/sample/pdftest.pdf
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/sample/pdftest.pdf?rev=909269&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-tika/sample/pdftest.pdf (added)
+++ lucene/nutch/trunk/src/plugin/parse-tika/sample/pdftest.pdf Fri Feb 12 06:59:40 2010
@@ -0,0 +1,157 @@
+%PDF-1.2 
+%âãÏÓ
+ 
+9 0 obj
+<<
+/Length 10 0 R
+/Filter /FlateDecode 
+>>
+stream
+H‰ÍÑJÃ0†Ÿ ïð{§²fç$M“ínÒ-‚[&jeŠâÛÛ¤ñ~‚$ÉÉÿ}ÉɅ¬Ij«¬ÌsÀ—‚Ç~€XÖ-],÷‚$Y—÷Ó)ü'N«u­1!œ„ÀVÙ?ŸÁ?
+žb1RbbœÒ‰ÉH²[¹™TD:#ž&Ø­ÙÌX®¦øiç»$qnf¬ƒ¿†¶]»ÀõËîãaÿ¶{ÿÂØ£‰›×q|JªLs]™QÒI¸¬jî„%¯Œ9Øé`ß঺¼ÅU»itezÛ$›’Ú¿OeBÆĒүá¸Råþ@zܗúóÿgª¼ø<õ¡ª
+endstream
+endobj
+10 0 obj
+246
+endobj
+4 0 obj
+<<
+/Type /Page
+/Parent 5 0 R
+/Resources <<
+/Font <<
+/F0 6 0 R 
+/F1 7 0 R 
+>>
+/ProcSet 2 0 R
+>>
+/Contents 9 0 R
+>>
+endobj
+6 0 obj
+<<
+/Type /Font
+/Subtype /TrueType
+/Name /F0
+/BaseFont /Arial
+/Encoding /WinAnsiEncoding
+>>
+endobj
+7 0 obj
+<<
+/Type /Font
+/Subtype /TrueType
+/Name /F1
+/BaseFont /BookAntiqua,Bold
+/FirstChar 31
+/LastChar 255
+/Widths [ 750 250 278 402 606 500 889 833 227 333 333 444 606 250 333 250 
+296 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 
+444 747 778 667 722 833 611 556 833 833 389 389 778 611 1000 833 
+833 611 833 722 611 667 778 778 1000 667 667 667 333 606 333 606 
+500 333 500 611 444 611 500 389 556 611 333 333 611 333 889 611 
+556 611 611 389 444 333 611 556 833 500 556 500 310 606 310 606 
+750 500 750 333 500 500 1000 500 500 333 1000 611 389 1000 750 750 
+750 750 278 278 500 500 606 500 1000 333 998 444 389 833 750 750 
+667 250 278 500 500 606 500 606 500 333 747 438 500 606 333 747 
+500 400 549 361 361 333 576 641 250 333 361 488 500 889 890 889 
+444 778 778 778 778 778 778 1000 722 611 611 611 611 389 389 389 
+389 833 833 833 833 833 833 833 606 833 778 778 778 778 667 611 
+611 500 500 500 500 500 500 778 444 500 500 500 500 333 333 333 
+333 556 611 556 556 556 556 556 549 556 611 611 611 611 556 611 
+556 ]
+/Encoding /WinAnsiEncoding
+/FontDescriptor 8 0 R
+>>
+endobj
+8 0 obj
+<<
+/Type /FontDescriptor
+/FontName /BookAntiqua,Bold
+/Flags 16418
+/FontBBox [ -250 -260 1236 930 ]
+/MissingWidth 750
+/StemV 146
+/StemH 146
+/ItalicAngle 0
+/CapHeight 930
+/XHeight 651
+/Ascent 930
+/Descent 260
+/Leading 210
+/MaxWidth 1030
+/AvgWidth 460
+>>
+endobj
+2 0 obj
+[ /PDF /Text  ]
+endobj
+5 0 obj
+<<
+/Kids [4 0 R ]
+/Count 1
+/Type /Pages
+/MediaBox [ 0 0 612 792 ]
+>>
+endobj
+1 0 obj
+<<
+/Creator (1725.fm)
+/CreationDate (1-Jan-3 18:15PM)
+/Title (1725.PDF)
+/Author (Unknown)
+/Producer (Acrobat PDFWriter 3.02 for Windows)
+/Keywords ()
+/Subject ()
+>>
+endobj
+3 0 obj
+<<
+/Pages 5 0 R
+/Type /Catalog
+/DefaultGray 11 0 R
+/DefaultRGB  12 0 R
+>>
+endobj
+11 0 obj
+[/CalGray
+<<
+/WhitePoint [0.9505 1 1.0891 ]
+/Gamma 0.2468 
+>>
+]
+endobj
+12 0 obj
+[/CalRGB
+<<
+/WhitePoint [0.9505 1 1.0891 ]
+/Gamma [0.2468 0.2468 0.2468 ]
+/Matrix [0.4361 0.2225 0.0139 0.3851 0.7169 0.0971 0.1431 0.0606 0.7141 ]
+>>
+]
+endobj
+xref
+0 13
+0000000000 65535 f
+0000002172 00000 n
+0000002046 00000 n
+0000002363 00000 n
+0000000375 00000 n
+0000002080 00000 n
+0000000518 00000 n
+0000000633 00000 n
+0000001760 00000 n
+0000000021 00000 n
+0000000352 00000 n
+0000002460 00000 n
+0000002548 00000 n
+trailer
+<<
+/Size 13
+/Root 3 0 R
+/Info 1 0 R
+/ID [<47149510433dd4882f05f8c124223734><47149510433dd4882f05f8c124223734>]
+>>
+startxref
+2726
+%%EOF

Added: lucene/nutch/trunk/src/plugin/parse-tika/sample/test.rtf
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/sample/test.rtf?rev=909269&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-tika/sample/test.rtf (added)
+++ lucene/nutch/trunk/src/plugin/parse-tika/sample/test.rtf Fri Feb 12 06:59:40 2010
@@ -0,0 +1,17 @@
+{\rtf1\ansi\deff1\adeflang1025
+{\fonttbl{\f0\froman\fprq2\fcharset0 Times;}{\f1\froman\fprq2\fcharset0 Times New Roman;}{\f2\fmodern\fprq1\fcharset0 Courier New;}{\f3\froman\fprq2\fcharset0 Times New Roman;}{\f4\fnil\fprq2\fcharset0 Interface User;}{\f5\fnil\fprq2\fcharset0 Lucidasans;}{\f6\fnil\fprq0\fcharset0 Lucidasans;}}
+{\colortbl;\red0\green0\blue0;\red0\green0\blue128;\red128\green128\blue128;}
+{\stylesheet{\s1\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af5\afs24\lang255\ltrch\dbch\af4\afs24\langfe255\loch\f1\fs24\lang1033\snext1 Default;}
+{\s2\sa120\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\afs24\lang255\ltrch\dbch\afs24\langfe255\loch\f1\fs24\lang1033\sbasedon1\snext2 Text body;}
+{\s3\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af6\afs24\lang255\ltrch\dbch\af1\afs24\langfe255\loch\f1\fs24\lang1033\sbasedon2\snext3 List;}
+{\s4\sb120\sa120\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af6\afs20\lang255\ai\ltrch\dbch\afs20\langfe255\ai\loch\f1\fs20\lang1033\i\sbasedon1\snext4 Caption;}
+{\s5\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\rtlch\af6\afs24\lang255\ltrch\dbch\afs24\langfe255\loch\f1\fs24\lang1033\sbasedon1\snext5 Index;}
+{\*\cs7\cf0\rtlch\af2\afs24\lang255\ltrch\dbch\af2\afs24\langfe255\loch\f2\fs24\lang1033 Teletype;}
+{\*\cs8\cf2\ul\rtlch\afs24\lang255\ltrch\dbch\afs24\langfe255\loch\fs24\lang1033 Internet Link;}
+}
+{\info{\title test rft document}{\subject tests}{\creatim\yr2004\mo9\dy20\hr19\min36}{\revtim\yr1601\mo1\dy1\hr0\min0}{\printim\yr1601\mo1\dy1\hr0\min0}{\comment StarWriter}{\vern6450}}\deftab709
+{\*\pgdsctbl
+{\pgdsc0\pgdscuse195\pgwsxn11905\pghsxn16837\marglsxn1800\margrsxn1800\margtsxn1440\margbsxn1440\pgdscnxt0 Default;}}
+{\*\pgdscno0}\paperh16837\paperw11905\margl1800\margr1800\margt1440\margb1440\sectd\sbknone\pgwsxn11905\pghsxn16837\marglsxn1800\margrsxn1800\margtsxn1440\margbsxn1440\ftnbj\ftnstart1\ftnrstcont\ftnnar\aenddoc\aftnrstcont\aftnstart1\aftnnrlc
+\pard\plain \ltrpar\s1\cf0{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ql\rtlch\af5\afs24\lang255\ltrch\dbch\af4\afs24\langfe255\loch\f1\fs24\lang1033{\loch\f2\fs24\lang1033\i0\b0\*\cs7\cf0\rtlch\ltrch\dbch\loch\f2\fs24\lang1033 The quick brown fox jumps over the lazy dog}
+\par }
\ No newline at end of file

Added: lucene/nutch/trunk/src/plugin/parse-tika/sample/word97.doc
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/plugin/parse-tika/sample/word97.doc?rev=909269&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/src/plugin/parse-tika/sample/word97.doc
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream