You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/06/03 21:11:56 UTC

svn commit: r179858 [1/3] - in /lucene/nutch/branches/mapred: ./ conf/ site/ src/java/org/apache/nutch/analysis/ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/parse/ src/java/org/apache/nutch/plugin/ src/java/org/apache/nutch/protocol/ src/java/org/apache/nutch/searcher/ src/java/org/apache/nutch/tools/ src/java/org/apache/nutch/util/ src/plugin/ src/plugin/creativecommons/src/java/org/creativecommons/nutch/ src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/ src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/ src/plugin/parse-html/ src/plugin/parse-html/lib/ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/ src/plugin/parse-html/src/test/org/apache/nutch/parse/html/ src/plugin/parse-js/ src/plugin/parse-js/src/ src/plugin/parse-js/src/java/ src/plugin/parse-js/src/java/org/ src/plugin/parse-js/src/java/org/apache/ src/plugin/parse-js/src/java/org/apache/nutch/ src/plugin/parse-js/src/java/org/apache/nutch/parse/ src/plugin/parse-js/src/java/org/apache/nutch/parse/js/ src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/ src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/ src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/ src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/ src/plugin/parse-text/src/java/org/apache/nutch/parse/text/ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/ src/plugin/protocol-httpclient/ src/plugin/protocol-httpclient/lib/ src/plugin/protocol-httpclient/src/ src/plugin/protocol-httpclient/src/java/ src/plugin/protocol-httpclient/src/java/org/ src/plugin/protocol-httpclient/src/java/org/apache/ src/plugin/protocol-httpclient/src/java/org/apache/nutch/ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/ src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/ src/site/src/documentation/ src/site/src/documentation/content/xdocs/ src/test/org/apache/nutch/analysis/ src/test/org/apache/nutch/fetcher/ src/test/org/apache/nutch/parse/ src/test/org/apache/nutch/tools/ src/test/org/apache/nutch/util/

Author: cutting
Date: Fri Jun  3 12:11:51 2005
New Revision: 179858

URL: http://svn.apache.org/viewcvs?rev=179858&view=rev
Log:
merge r171187:179837 from trunk

Added:
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HTMLMetaTags.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/java/org/apache/nutch/parse/HTMLMetaTags.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/ParseStatus.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseStatus.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/protocol/ProtocolOutput.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolOutput.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/protocol/ProtocolStatus.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolStatus.java
    lucene/nutch/branches/mapred/src/plugin/parse-html/lib/tagsoup-1.0rc3.jar   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/parse-html/lib/tagsoup-1.0rc3.jar
    lucene/nutch/branches/mapred/src/plugin/parse-html/lib/tagsoup.LICENSE.txt   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/parse-html/lib/tagsoup.LICENSE.txt
    lucene/nutch/branches/mapred/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
    lucene/nutch/branches/mapred/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HTMLMetaProcessor.java
    lucene/nutch/branches/mapred/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/XMLCharacterRecognizer.java
    lucene/nutch/branches/mapred/src/plugin/parse-js/
      - copied from r179837, lucene/nutch/trunk/src/plugin/parse-js/
    lucene/nutch/branches/mapred/src/plugin/parse-js/build.xml   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/parse-js/build.xml
    lucene/nutch/branches/mapred/src/plugin/parse-js/plugin.xml   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/parse-js/plugin.xml
    lucene/nutch/branches/mapred/src/plugin/parse-js/src/
      - copied from r179837, lucene/nutch/trunk/src/plugin/parse-js/src/
    lucene/nutch/branches/mapred/src/plugin/parse-js/src/java/
      - copied from r179837, lucene/nutch/trunk/src/plugin/parse-js/src/java/
    lucene/nutch/branches/mapred/src/plugin/parse-js/src/java/org/
      - copied from r179837, lucene/nutch/trunk/src/plugin/parse-js/src/java/org/
    lucene/nutch/branches/mapred/src/plugin/parse-js/src/java/org/apache/
      - copied from r179837, lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/
    lucene/nutch/branches/mapred/src/plugin/parse-js/src/java/org/apache/nutch/
      - copied from r179837, lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/
    lucene/nutch/branches/mapred/src/plugin/parse-js/src/java/org/apache/nutch/package.html   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/package.html
    lucene/nutch/branches/mapred/src/plugin/parse-js/src/java/org/apache/nutch/parse/
      - copied from r179837, lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/
    lucene/nutch/branches/mapred/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/
      - copied from r179837, lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/
    lucene/nutch/branches/mapred/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/
      - copied from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/build.xml   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/build.xml
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/lib/
      - copied from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/lib/commons-codec.jar   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/commons-codec.jar
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/lib/commons-httpclient-3.0-rc2.jar   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/lib/commons-httpclient-3.0-rc2.jar
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/plugin.xml   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/
      - copied from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/
      - copied from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/
      - copied from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/
      - copied from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/
      - copied from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/
      - copied from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/
      - copied from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummySSLProtocolSocketFactory.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/DummyX509TrustManager.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthentication.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationException.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpError.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpError.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpException.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpException.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/MultiProperties.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/MultiProperties.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/RobotRulesParser.java   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/RobotRulesParser.java
    lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html   (props changed)
      - copied unchanged from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html
Removed:
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/util/SoftHashMap.java
    lucene/nutch/branches/mapred/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/RobotsMetaProcessor.java
    lucene/nutch/branches/mapred/src/test/org/apache/nutch/util/TestSoftHashMap.java
Modified:
    lucene/nutch/branches/mapred/build.xml
    lucene/nutch/branches/mapred/conf/nutch-default.xml
    lucene/nutch/branches/mapred/site/about.html
    lucene/nutch/branches/mapred/site/about.pdf
    lucene/nutch/branches/mapred/site/bot.html
    lucene/nutch/branches/mapred/site/credits.html
    lucene/nutch/branches/mapred/site/faq.html
    lucene/nutch/branches/mapred/site/i18n.html
    lucene/nutch/branches/mapred/site/index.html
    lucene/nutch/branches/mapred/site/issue_tracking.html
    lucene/nutch/branches/mapred/site/linkmap.html
    lucene/nutch/branches/mapred/site/mailing_lists.html
    lucene/nutch/branches/mapred/site/tutorial.html
    lucene/nutch/branches/mapred/site/version_control.html
    lucene/nutch/branches/mapred/site/version_control.pdf
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/analysis/NutchAnalysis.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/analysis/NutchAnalysis.jj
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/CrawlDatum.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/fetcher/Fetcher.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/fetcher/FetcherOutput.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/indexer/IndexSegment.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HtmlParseFilter.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HtmlParseFilters.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/Parse.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/ParseData.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/Parser.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/ParserChecker.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/plugin/PluginManifestParser.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/plugin/PluginRepository.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/protocol/Protocol.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/searcher/NutchBean.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/searcher/OpenSearchServlet.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/tools/ParseSegment.java
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/tools/UpdateDatabaseTool.java
    lucene/nutch/branches/mapred/src/plugin/build.xml
    lucene/nutch/branches/mapred/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
    lucene/nutch/branches/mapred/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
    lucene/nutch/branches/mapred/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java
    lucene/nutch/branches/mapred/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
    lucene/nutch/branches/mapred/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
    lucene/nutch/branches/mapred/src/plugin/parse-html/plugin.xml
    lucene/nutch/branches/mapred/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
    lucene/nutch/branches/mapred/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java
    lucene/nutch/branches/mapred/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java
    lucene/nutch/branches/mapred/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java
    lucene/nutch/branches/mapred/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java
    lucene/nutch/branches/mapred/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
    lucene/nutch/branches/mapred/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java
    lucene/nutch/branches/mapred/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
    lucene/nutch/branches/mapred/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
    lucene/nutch/branches/mapred/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
    lucene/nutch/branches/mapred/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
    lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java
    lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml
    lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml
    lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/version_control.xml
    lucene/nutch/branches/mapred/src/site/src/documentation/skinconf.xml
    lucene/nutch/branches/mapred/src/test/org/apache/nutch/analysis/TestQueryParser.java
    lucene/nutch/branches/mapred/src/test/org/apache/nutch/fetcher/TestFetcherOutput.java
    lucene/nutch/branches/mapred/src/test/org/apache/nutch/parse/TestParseData.java
    lucene/nutch/branches/mapred/src/test/org/apache/nutch/tools/TestSegmentMergeTool.java

Modified: lucene/nutch/branches/mapred/build.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/build.xml?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/build.xml (original)
+++ lucene/nutch/branches/mapred/build.xml Fri Jun  3 12:11:51 2005
@@ -208,7 +208,9 @@
     	<packageset dir="${plugins.dir}/protocol-file/src/java"/>
     	<packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
        	<packageset dir="${plugins.dir}/protocol-http/src/java"/>
+       	<packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
     	<packageset dir="${plugins.dir}/parse-html/src/java"/>
+    	<packageset dir="${plugins.dir}/parse-js/src/java"/>
     	<packageset dir="${plugins.dir}/parse-text/src/java"/>
     	<packageset dir="${plugins.dir}/parse-pdf/src/java"/>
 	<packageset dir="${plugins.dir}/parse-rtf/src/java"/>

Modified: lucene/nutch/branches/mapred/conf/nutch-default.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/conf/nutch-default.xml?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/conf/nutch-default.xml (original)
+++ lucene/nutch/branches/mapred/conf/nutch-default.xml Fri Jun  3 12:11:51 2005
@@ -578,7 +578,7 @@
 
 <property>
   <name>plugin.includes</name>
-  <value>protocol-http|urlfilter-regex|parse-(text|html)|index-basic|query-(basic|site|url)</value>
+  <value>protocol-httpclient|urlfilter-regex|parse-(text|html|js)|index-basic|query-(basic|site|url)</value>
   <description>Regular expression naming plugin directory names to
   include.  Any plugin not matching this expression is excluded.  By
   default Nutch includes crawling just HTML and plain text via HTTP,
@@ -598,6 +598,14 @@
   <value>windows-1252</value>
   <description>The character encoding to fall back to when no other information
   is available</description>
+</property>
+
+<property>
+  <name>parser.html.impl</name>
+  <value>neko</value>
+  <description>HTML Parser implementation. Currently the following keywords
+  are recognized: "neko" uses NekoHTML, "tagsoup" uses TagSoup.
+  </description>
 </property>
 
 <!-- urlfilter plugin properties -->

Modified: lucene/nutch/branches/mapred/site/about.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/about.html?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/about.html (original)
+++ lucene/nutch/branches/mapred/site/about.html Fri Jun  3 12:11:51 2005
@@ -20,7 +20,7 @@
     |breadtrail
     +-->
 <div class="breadtrail">
-<a href="http://incubator.apache.org/">Incubator</a> &gt; <a href="http://incubator.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+<a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
 </div>
 <!--+
     |header
@@ -30,7 +30,7 @@
     |start group logo
     +-->
 <div class="grouplogo">
-<a href="http://incubator.apache.org/"><img class="logoImage" alt="Incubator" src="http://incubator.apache.org/images/apache-incubator-logo.png" title="Apache Incubator"></a>
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="http://lucene.apache.org/java/docs/images/lucene_green_150.gif" title="Apache Lucene"></a>
 </div>
 <!--+
     |end group logo
@@ -39,7 +39,7 @@
     |start Project Logo
     +-->
 <div class="projectlogoA1">
-<a href="http://incubator.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
+<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
 </div>
 <!--+
     |end Project Logo
@@ -138,7 +138,7 @@
 <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
 <div id="menu_1.4" class="menuitemgroup">
 <div class="menuitem">
-<a title="" href="http://jakarta.apache.org/lucene/">Lucene</a>
+<a title="" href="http://lucene.apache.org/">Lucene</a>
 </div>
 </div>
 <div id="credit"></div>
@@ -176,7 +176,7 @@
 <a name="N1000C"></a><a name="Overview"></a>
 <h2 class="h3">Overview</h2>
 <div class="section">
-<p>Nutch is open source web-search software.  It builds on <a href="http://jakarta.apache.org/lucene/">Lucene</a>, adding web-specifics, such as a
+<p>Nutch is open source web-search software.  It builds on <a href="http://lucene.apache.org/">Lucene</a>, adding web-specifics, such as a
       crawler, a link-graph database, parsers for HTML and other
       document formats, etc.</p>
 <p>For more information about Nutch, please see the <a href="http://wiki.apache.org/nutch/">Nutch wiki.</a>

Modified: lucene/nutch/branches/mapred/site/about.pdf
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/about.pdf?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/about.pdf (original)
+++ lucene/nutch/branches/mapred/site/about.pdf Fri Jun  3 12:11:51 2005
@@ -47,10 +47,10 @@
 >>
 endobj
 12 0 obj
-<< /Length 1428 /Filter [ /ASCII85Decode /FlateDecode ]
+<< /Length 1425 /Filter [ /ASCII85Decode /FlateDecode ]
  >>
 stream
-Gasapa`?E"&A@B[E3u[c&GKaRSg;HVP@P<,C(>4n&.+42PCs,CX\uksbtVkBe;VQeJgS)+4ZpApIFt^RS!^\dFi_!n3FomFM$&5[8KE$l"[7)-Lhpktc^d_tab(E'n*g7'jm#k8j$!4)Ck%'[+`Y1ML`O9PBb6@&s6+Q*RLl2NcgnjjZYF'L12bZ5^3aNR;g<L:$[lQCURHY^k'>R8.`VjaWR@EIX08S/5k=ZqAEARYNLTtUFf6+?ReWjT;f>uD`1]a>P9Y?VrO)L+S[YU^3<M,^%+^JF`Z%,']Va4NqV]8XcZc5:L\L--(\Y_1As=<<<cT\?IU*C[f$F'pa?0"7nm"$K$^qPf'8=M2oNg3[iYZ-YRE&"+A&[!1`ICp_H$S$mFsn_+(7HodWAC6iR:\P3EYR]P<6H$!>"E3?^MJE"LiPC63\t&^oOTY]Y3(_P!nBtSW*EtWZ,G:ma].7(Vlup`65.T\;FmTQ[Z__(3iEn5X#T$+Cicr`"LiL94#Dqi0TS\Ullse\P0>T9Uq8X?_)>T(jc"A;5,J;(Ph,s6MZ?=B,;6PEi*>JkUT*l6?3FQD+$Q4H(t3kZ^*:HAF?c38mVk<9(G]$'+$\DFkBLJn%pMMeFh<S'r_^%*`C/Ntn(Zt?_M&M5GMJM^66o)H8q,_:&eXh$L2uke"GjeXR[gFl['$rAW0ulXc-;0eJLn-VRV^HcfgnXhKj2&Hi)K[anbMVCC%H`P*W@`N`P6R_EI=_n<-;3/7Dk2:$2lO#pd*r2'8\;Q7gg"3)-5-6gH6/l]9ONo(E(]7>ll$sTq/fe/oQm\G+i$=<L;rF@EZi)?$B#8!1:V%AGKF><o$s/ea179C4MiE(*P<`/%-Ac>#C.V0MA.F#[%.o5_#9>eZ<I=9c90G>&Ch:;.q6:$WDf:dT9OmkSui5f;F?ko=5R#`T":[!1E9/APG$[0B.j%)UGo3hH.[\YU"qa:fL8VPnP[s3bC?G0R4e[3XXY2VM!6n2atop<!6YkQQ0M,,`:Ve8TB<:0JoYKANV8Vs!NiE8MiT(Ui<].7,Hu;qDhI/mW8O[k[:Y5#C^&CAQsUR$39$->DDc]bmt1Lj^RuUU6MNp"Ne]@[O0kXBp/(W\$TBR"[RYHYr(]jWB\?4Y_CqIAJ90becVXB'5BZ+/X,Fn$/HfsndaJTj<]N'LL;(K9qHcH(f:9EN@k<#JsT@G#3u$/RjOUb#f:b;jadP!9oZ'A.6^[fH(OtJS/l:BlU?ic)t0fi#;D=::fcpDe4kNLgZ(C:`X1W;;mTg?27cOVit/A+^OSm,,`7dpUgUlUYHiiln.g;Gs#K(7aZcsm2n;%UX#'/JDS8"8&OA&E5oPG&ER$rJ&+X[;@UGk@T=rT?OW>f+X%?=O_@..=)@mPB)`(;eZu'XPj'p*cU,@]J#d6@gL:I6akeMgu~>
+Gasapa`?E"&A@B[E3uZ^M#/qd:D.6<8MZ-&22=-rL]g/b,bY6)X-Be`M]3QIQ#ZUD^r#F*LPOK`O5&32',*<t%bVUJL[8(pET#.hW<7KEi)A6gaL3FFh7<0L4F5t3s6fV?Y20:GhNP^rp(m]8M<bTn2Lsu=+<Ji.f'07m&>D%Sjaf@&e]LWugoQQapXjM`nEEshqT'@@3W6&(k?TI8*!^/s9Rk]t^931nae(e\PoiIPEWSe]TjCE-ihnD']kLaaLF-G'NoYKYa6M].3i_ma0%<.R>!I-IDtk._e*<jI!,oUKE`["(pon(+r?9;n/1W`GC;55.2^LSFBK!V#E_`s]@<jJor<7$CSr#dEm/s)c_aH8u&i1Uc`Jt[7%u^Ih7.^8b=6tHVEi.NM[2b_s0\(2tn-c5>ihu1IT+H=4UYF3C=%hY0Bh4f"gg"Gsbo:N9f1b4&\^'C_:@-G?`7a(EC8m<a6o.(8M:BXjZ9pp)<TBf(%P/a23Ee[R[rggq1lWV4O%-ek,LBe\9H3HKi<3S()kYo+6p&7GV#S1QY6"P/J0_"S(Yf.\':fZ&9TS15d)Gb=N0rb>o7Q9<U$'jobO$CrkXrkKm-=m\o,M[6o<<p*=gu#Qn*`]7<D:oK,&`oU#`h$:gi?Wi$]uQk./=D"OGL+W1)-2o+@,S4Y0"6.9C&"(Kq+RFd+l!QlH(gIkKucDc:1-^&T6RdJD^-#M(a'Kc^U9uDt&Nt;uCDY'ZX"_5Zq0R=Z$%YS^@DBN#sl?3Ki$(G-jL?aKUOs%@1H9)7l.X0c(_V]Nm!J!8!XNTIt/G$[/4GX&YMN:15BkA6jE(3<1acrQ0)n*3?`ZW2nUPFk'</``lD^akj8PJ-1^%r/4\jNCCoTE=)9LWOSBS%AkS,F@E5^QTY\8Ub$@hTspOJ/37k`+Hl%p6_aBSMTTDu>!l,Smb25H2,Z*Y'dY].l>$)S/#ItG.4g7KJul&@=II_t_ek"I""Df$i%?ckKKd'W/]Ua$M!8/b>k9@@N,p;a"WkP-R*q8iPdp[a3/3W1RR0=!C,(RXaX)%SUq#7F7\uc1`F^+bJantAq$ec<#m]-=Ub?M`=;We0]:Tn91lW^c5"rIDnbQ"EbWG9-\HW5&!"WOQo(,b/`"0)+hIpJ1ku+Y\ME[)$j+20;9G^Fi(7CM-kb]OW.9sRigPBb<lpJ[H!_@*G)&O"!P^5"a:HS$6CaK]mgC9Hq')XRDSi0M0;i5jlr+MH<BW:;dNn56=WojU@P,+1?loNWjJg&js1`aRBiY#1r?hHFj:/'[ZT'(A"P?:?i-cJ?eHK/=#$Q\.Up;m'mM*D4AbkBi1no1bWE#Q7ZiZLVl\_m^,'U$D&-gC*\iM=E+7!ZOlTo5!Q-T%l$<PuZ^'Z1h"'Z5qK-n%,7[/+4Ya)9:n_p=&rSl8#3Ij2jcHN~>
 endstream
 endobj
 13 0 obj
@@ -76,7 +76,7 @@
 /Rect [ 356.604 629.666 391.92 617.666 ]
 /C [ 0 0 0 ]
 /Border [ 0 0 0 ]
-/A << /URI (http://jakarta.apache.org/lucene/)
+/A << /URI (http://lucene.apache.org/)
 /S /URI >>
 /H /I
 >>
@@ -200,32 +200,32 @@
 xref
 0 27
 0000000000 65535 f 
-0000004364 00000 n 
-0000004429 00000 n 
-0000004521 00000 n 
+0000004353 00000 n 
+0000004418 00000 n 
+0000004510 00000 n 
 0000000015 00000 n 
 0000000071 00000 n 
 0000000569 00000 n 
 0000000689 00000 n 
 0000000721 00000 n 
-0000004644 00000 n 
+0000004633 00000 n 
 0000000856 00000 n 
-0000004707 00000 n 
+0000004696 00000 n 
 0000000992 00000 n 
-0000002513 00000 n 
-0000002636 00000 n 
-0000002684 00000 n 
-0000002870 00000 n 
-0000003052 00000 n 
-0000003252 00000 n 
-0000004773 00000 n 
-0000003452 00000 n 
-0000003591 00000 n 
-0000003808 00000 n 
-0000003921 00000 n 
-0000004031 00000 n 
-0000004139 00000 n 
-0000004255 00000 n 
+0000002510 00000 n 
+0000002633 00000 n 
+0000002681 00000 n 
+0000002859 00000 n 
+0000003041 00000 n 
+0000003241 00000 n 
+0000004762 00000 n 
+0000003441 00000 n 
+0000003580 00000 n 
+0000003797 00000 n 
+0000003910 00000 n 
+0000004020 00000 n 
+0000004128 00000 n 
+0000004244 00000 n 
 trailer
 <<
 /Size 27
@@ -233,5 +233,5 @@
 /Info 4 0 R
 >>
 startxref
-4824
+4813
 %%EOF

Modified: lucene/nutch/branches/mapred/site/bot.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/bot.html?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/bot.html (original)
+++ lucene/nutch/branches/mapred/site/bot.html Fri Jun  3 12:11:51 2005
@@ -20,7 +20,7 @@
     |breadtrail
     +-->
 <div class="breadtrail">
-<a href="http://incubator.apache.org/">Incubator</a> &gt; <a href="http://incubator.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+<a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
 </div>
 <!--+
     |header
@@ -30,7 +30,7 @@
     |start group logo
     +-->
 <div class="grouplogo">
-<a href="http://incubator.apache.org/"><img class="logoImage" alt="Incubator" src="http://incubator.apache.org/images/apache-incubator-logo.png" title="Apache Incubator"></a>
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="http://lucene.apache.org/java/docs/images/lucene_green_150.gif" title="Apache Lucene"></a>
 </div>
 <!--+
     |end group logo
@@ -39,7 +39,7 @@
     |start Project Logo
     +-->
 <div class="projectlogoA1">
-<a href="http://incubator.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
+<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
 </div>
 <!--+
     |end Project Logo
@@ -138,7 +138,7 @@
 <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
 <div id="menu_1.4" class="menuitemgroup">
 <div class="menuitem">
-<a title="" href="http://jakarta.apache.org/lucene/">Lucene</a>
+<a title="" href="http://lucene.apache.org/">Lucene</a>
 </div>
 </div>
 <div id="credit"></div>

Modified: lucene/nutch/branches/mapred/site/credits.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/credits.html?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/credits.html (original)
+++ lucene/nutch/branches/mapred/site/credits.html Fri Jun  3 12:11:51 2005
@@ -20,7 +20,7 @@
     |breadtrail
     +-->
 <div class="breadtrail">
-<a href="http://incubator.apache.org/">Incubator</a> &gt; <a href="http://incubator.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+<a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
 </div>
 <!--+
     |header
@@ -30,7 +30,7 @@
     |start group logo
     +-->
 <div class="grouplogo">
-<a href="http://incubator.apache.org/"><img class="logoImage" alt="Incubator" src="http://incubator.apache.org/images/apache-incubator-logo.png" title="Apache Incubator"></a>
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="http://lucene.apache.org/java/docs/images/lucene_green_150.gif" title="Apache Lucene"></a>
 </div>
 <!--+
     |end group logo
@@ -39,7 +39,7 @@
     |start Project Logo
     +-->
 <div class="projectlogoA1">
-<a href="http://incubator.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
+<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
 </div>
 <!--+
     |end Project Logo
@@ -138,7 +138,7 @@
 <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
 <div id="menu_1.4" class="menuitemgroup">
 <div class="menuitem">
-<a title="" href="http://jakarta.apache.org/lucene/">Lucene</a>
+<a title="" href="http://lucene.apache.org/">Lucene</a>
 </div>
 </div>
 <div id="credit"></div>

Modified: lucene/nutch/branches/mapred/site/faq.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/faq.html?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/faq.html (original)
+++ lucene/nutch/branches/mapred/site/faq.html Fri Jun  3 12:11:51 2005
@@ -20,7 +20,7 @@
     |breadtrail
     +-->
 <div class="breadtrail">
-<a href="http://incubator.apache.org/">Incubator</a> &gt; <a href="http://incubator.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+<a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
 </div>
 <!--+
     |header
@@ -30,7 +30,7 @@
     |start group logo
     +-->
 <div class="grouplogo">
-<a href="http://incubator.apache.org/"><img class="logoImage" alt="Incubator" src="http://incubator.apache.org/images/apache-incubator-logo.png" title="Apache Incubator"></a>
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="http://lucene.apache.org/java/docs/images/lucene_green_150.gif" title="Apache Lucene"></a>
 </div>
 <!--+
     |end group logo
@@ -39,7 +39,7 @@
     |start Project Logo
     +-->
 <div class="projectlogoA1">
-<a href="http://incubator.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
+<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
 </div>
 <!--+
     |end Project Logo
@@ -138,7 +138,7 @@
 <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
 <div id="menu_1.4" class="menuitemgroup">
 <div class="menuitem">
-<a title="" href="http://jakarta.apache.org/lucene/">Lucene</a>
+<a title="" href="http://lucene.apache.org/">Lucene</a>
 </div>
 </div>
 <div id="credit"></div>

Modified: lucene/nutch/branches/mapred/site/i18n.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/i18n.html?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/i18n.html (original)
+++ lucene/nutch/branches/mapred/site/i18n.html Fri Jun  3 12:11:51 2005
@@ -20,7 +20,7 @@
     |breadtrail
     +-->
 <div class="breadtrail">
-<a href="http://incubator.apache.org/">Incubator</a> &gt; <a href="http://incubator.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+<a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
 </div>
 <!--+
     |header
@@ -30,7 +30,7 @@
     |start group logo
     +-->
 <div class="grouplogo">
-<a href="http://incubator.apache.org/"><img class="logoImage" alt="Incubator" src="http://incubator.apache.org/images/apache-incubator-logo.png" title="Apache Incubator"></a>
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="http://lucene.apache.org/java/docs/images/lucene_green_150.gif" title="Apache Lucene"></a>
 </div>
 <!--+
     |end group logo
@@ -39,7 +39,7 @@
     |start Project Logo
     +-->
 <div class="projectlogoA1">
-<a href="http://incubator.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
+<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
 </div>
 <!--+
     |end Project Logo
@@ -138,7 +138,7 @@
 <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
 <div id="menu_1.4" class="menuitemgroup">
 <div class="menuitem">
-<a title="" href="http://jakarta.apache.org/lucene/">Lucene</a>
+<a title="" href="http://lucene.apache.org/">Lucene</a>
 </div>
 </div>
 <div id="credit"></div>
@@ -257,7 +257,7 @@
 <p>Each item typically includes an HTML anchor, one for each of the
 top-level pages in the translation.</p>
 <p>For example, the header file for an English translation is filed
-as <a href="http://svn.apache.org/repos/asf/incubator/nutch/trunk/src/web/include/en/header.xml"><tt>src/web/include/en/header.xml</tt></a>.</p>
+as <a href="http://svn.apache.org/repos/asf/lucene/nutch/trunk/src/web/include/en/header.xml"><tt>src/web/include/en/header.xml</tt></a>.</p>
 </div>
 
 
@@ -290,7 +290,7 @@
 entities in your data, you'll need to declare these too.  Look at
 existing translations for examples of this.</p>
 <p>For example, the English language "about" page is filed
-as <a href="http://svn.apache.org/repos/asf/incubator/nutch/trunk/src/web/pages/en/about.xml"><tt>src/web/pages/en/about.xml</tt></a>.</p>
+as <a href="http://svn.apache.org/repos/asf/lucene/nutch/trunk/src/web/pages/en/about.xml"><tt>src/web/pages/en/about.xml</tt></a>.</p>
 </div>
 
 
@@ -305,10 +305,10 @@
 page.</p>
 <p>These property files are filed as
 <tt>src/web/locale/org/nutch/jsp/<i>page</i>_<i>language</i>.xml</tt>
-where <i>page</i> is the name of the JSP page in <a href="http://svn.apache.org/repos/asf/incubator/nutch/trunk/src/web/jsp/"><tt>src/web/jsp/</tt></a>
+where <i>page</i> is the name of the JSP page in <a href="http://svn.apache.org/repos/asf/lucene/nutch/trunk/src/web/jsp/"><tt>src/web/jsp/</tt></a>
 and <i>language</i> is the IS0639 language code, as above.</p>
 <p>For example, text for the English language search results page is filed
-as <a href="http://svn.apache.org/repos/asf/incubator/nutch/trunk/src/web/locale/org/nutch/jsp/search_en.properties"><tt>src/web/locale/org/nutch/jsp/search_en.properties</tt></a>.
+as <a href="http://svn.apache.org/repos/asf/lucene/nutch/trunk/src/web/locale/org/nutch/jsp/search_en.properties"><tt>src/web/locale/org/nutch/jsp/search_en.properties</tt></a>.
  This contains something like:</p>
 <pre>
   title = search results

Modified: lucene/nutch/branches/mapred/site/index.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/index.html?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/index.html (original)
+++ lucene/nutch/branches/mapred/site/index.html Fri Jun  3 12:11:51 2005
@@ -20,7 +20,7 @@
     |breadtrail
     +-->
 <div class="breadtrail">
-<a href="http://incubator.apache.org/">Incubator</a> &gt; <a href="http://incubator.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+<a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
 </div>
 <!--+
     |header
@@ -30,7 +30,7 @@
     |start group logo
     +-->
 <div class="grouplogo">
-<a href="http://incubator.apache.org/"><img class="logoImage" alt="Incubator" src="http://incubator.apache.org/images/apache-incubator-logo.png" title="Apache Incubator"></a>
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="http://lucene.apache.org/java/docs/images/lucene_green_150.gif" title="Apache Lucene"></a>
 </div>
 <!--+
     |end group logo
@@ -39,7 +39,7 @@
     |start Project Logo
     +-->
 <div class="projectlogoA1">
-<a href="http://incubator.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
+<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
 </div>
 <!--+
     |end Project Logo
@@ -138,7 +138,7 @@
 <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
 <div id="menu_1.4" class="menuitemgroup">
 <div class="menuitem">
-<a title="" href="http://jakarta.apache.org/lucene/">Lucene</a>
+<a title="" href="http://lucene.apache.org/">Lucene</a>
 </div>
 </div>
 <div id="credit">

Modified: lucene/nutch/branches/mapred/site/issue_tracking.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/issue_tracking.html?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/issue_tracking.html (original)
+++ lucene/nutch/branches/mapred/site/issue_tracking.html Fri Jun  3 12:11:51 2005
@@ -20,7 +20,7 @@
     |breadtrail
     +-->
 <div class="breadtrail">
-<a href="http://incubator.apache.org/">Incubator</a> &gt; <a href="http://incubator.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+<a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
 </div>
 <!--+
     |header
@@ -30,7 +30,7 @@
     |start group logo
     +-->
 <div class="grouplogo">
-<a href="http://incubator.apache.org/"><img class="logoImage" alt="Incubator" src="http://incubator.apache.org/images/apache-incubator-logo.png" title="Apache Incubator"></a>
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="http://lucene.apache.org/java/docs/images/lucene_green_150.gif" title="Apache Lucene"></a>
 </div>
 <!--+
     |end group logo
@@ -39,7 +39,7 @@
     |start Project Logo
     +-->
 <div class="projectlogoA1">
-<a href="http://incubator.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
+<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
 </div>
 <!--+
     |end Project Logo
@@ -138,7 +138,7 @@
 <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
 <div id="menu_1.4" class="menuitemgroup">
 <div class="menuitem">
-<a title="" href="http://jakarta.apache.org/lucene/">Lucene</a>
+<a title="" href="http://lucene.apache.org/">Lucene</a>
 </div>
 </div>
 <div id="credit"></div>

Modified: lucene/nutch/branches/mapred/site/linkmap.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/linkmap.html?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/linkmap.html (original)
+++ lucene/nutch/branches/mapred/site/linkmap.html Fri Jun  3 12:11:51 2005
@@ -20,7 +20,7 @@
     |breadtrail
     +-->
 <div class="breadtrail">
-<a href="http://incubator.apache.org/">Incubator</a> &gt; <a href="http://incubator.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+<a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
 </div>
 <!--+
     |header
@@ -30,7 +30,7 @@
     |start group logo
     +-->
 <div class="grouplogo">
-<a href="http://incubator.apache.org/"><img class="logoImage" alt="Incubator" src="http://incubator.apache.org/images/apache-incubator-logo.png" title="Apache Incubator"></a>
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="http://lucene.apache.org/java/docs/images/lucene_green_150.gif" title="Apache Lucene"></a>
 </div>
 <!--+
     |end group logo
@@ -39,7 +39,7 @@
     |start Project Logo
     +-->
 <div class="projectlogoA1">
-<a href="http://incubator.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
+<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
 </div>
 <!--+
     |end Project Logo
@@ -138,7 +138,7 @@
 <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
 <div id="menu_1.4" class="menuitemgroup">
 <div class="menuitem">
-<a title="" href="http://jakarta.apache.org/lucene/">Lucene</a>
+<a title="" href="http://lucene.apache.org/">Lucene</a>
 </div>
 </div>
 <div id="credit"></div>
@@ -259,7 +259,7 @@
 <ul>
     
 <li>
-<a href="http://jakarta.apache.org/lucene/">Lucene</a>&nbsp;&nbsp;&nbsp;_________________________&nbsp;&nbsp;<em>lucene</em>
+<a href="http://lucene.apache.org/">Lucene</a>&nbsp;&nbsp;&nbsp;_________________________&nbsp;&nbsp;<em>lucene</em>
 </li>
   
 </ul>

Modified: lucene/nutch/branches/mapred/site/mailing_lists.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/mailing_lists.html?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/mailing_lists.html (original)
+++ lucene/nutch/branches/mapred/site/mailing_lists.html Fri Jun  3 12:11:51 2005
@@ -20,7 +20,7 @@
     |breadtrail
     +-->
 <div class="breadtrail">
-<a href="http://incubator.apache.org/">Incubator</a> &gt; <a href="http://incubator.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+<a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
 </div>
 <!--+
     |header
@@ -30,7 +30,7 @@
     |start group logo
     +-->
 <div class="grouplogo">
-<a href="http://incubator.apache.org/"><img class="logoImage" alt="Incubator" src="http://incubator.apache.org/images/apache-incubator-logo.png" title="Apache Incubator"></a>
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="http://lucene.apache.org/java/docs/images/lucene_green_150.gif" title="Apache Lucene"></a>
 </div>
 <!--+
     |end group logo
@@ -39,7 +39,7 @@
     |start Project Logo
     +-->
 <div class="projectlogoA1">
-<a href="http://incubator.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
+<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
 </div>
 <!--+
     |end Project Logo
@@ -138,7 +138,7 @@
 <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
 <div id="menu_1.4" class="menuitemgroup">
 <div class="menuitem">
-<a title="" href="http://jakarta.apache.org/lucene/">Lucene</a>
+<a title="" href="http://lucene.apache.org/">Lucene</a>
 </div>
 </div>
 <div id="credit"></div>

Modified: lucene/nutch/branches/mapred/site/tutorial.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/tutorial.html?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/tutorial.html (original)
+++ lucene/nutch/branches/mapred/site/tutorial.html Fri Jun  3 12:11:51 2005
@@ -20,7 +20,7 @@
     |breadtrail
     +-->
 <div class="breadtrail">
-<a href="http://incubator.apache.org/">Incubator</a> &gt; <a href="http://incubator.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+<a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
 </div>
 <!--+
     |header
@@ -30,7 +30,7 @@
     |start group logo
     +-->
 <div class="grouplogo">
-<a href="http://incubator.apache.org/"><img class="logoImage" alt="Incubator" src="http://incubator.apache.org/images/apache-incubator-logo.png" title="Apache Incubator"></a>
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="http://lucene.apache.org/java/docs/images/lucene_green_150.gif" title="Apache Lucene"></a>
 </div>
 <!--+
     |end group logo
@@ -39,7 +39,7 @@
     |start Project Logo
     +-->
 <div class="projectlogoA1">
-<a href="http://incubator.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
+<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
 </div>
 <!--+
     |end Project Logo
@@ -138,7 +138,7 @@
 <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
 <div id="menu_1.4" class="menuitemgroup">
 <div class="menuitem">
-<a title="" href="http://jakarta.apache.org/lucene/">Lucene</a>
+<a title="" href="http://lucene.apache.org/">Lucene</a>
 </div>
 </div>
 <div id="credit"></div>

Modified: lucene/nutch/branches/mapred/site/version_control.html
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/version_control.html?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/version_control.html (original)
+++ lucene/nutch/branches/mapred/site/version_control.html Fri Jun  3 12:11:51 2005
@@ -20,7 +20,7 @@
     |breadtrail
     +-->
 <div class="breadtrail">
-<a href="http://incubator.apache.org/">Incubator</a> &gt; <a href="http://incubator.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
+<a href="http://lucene.apache.org/">Lucene</a> &gt; <a href="http://lucene.apache.org/nutch/">Nutch</a><script src="skin/breadcrumbs.js" language="JavaScript" type="text/javascript"></script>
 </div>
 <!--+
     |header
@@ -30,7 +30,7 @@
     |start group logo
     +-->
 <div class="grouplogo">
-<a href="http://incubator.apache.org/"><img class="logoImage" alt="Incubator" src="http://incubator.apache.org/images/apache-incubator-logo.png" title="Apache Incubator"></a>
+<a href="http://lucene.apache.org/"><img class="logoImage" alt="Lucene" src="http://lucene.apache.org/java/docs/images/lucene_green_150.gif" title="Apache Lucene"></a>
 </div>
 <!--+
     |end group logo
@@ -39,7 +39,7 @@
     |start Project Logo
     +-->
 <div class="projectlogoA1">
-<a href="http://incubator.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
+<a href="http://lucene.apache.org/nutch/"><img class="logoImage" alt="Nutch" src="images/nutch-logo.gif" title="Open Source Web Search Software"></a>
 </div>
 <!--+
     |end Project Logo
@@ -138,7 +138,7 @@
 <div onclick="SwitchMenu('menu_1.4', 'skin/')" id="menu_1.4Title" class="menutitle">Related Projects</div>
 <div id="menu_1.4" class="menuitemgroup">
 <div class="menuitem">
-<a title="" href="http://jakarta.apache.org/lucene/">Lucene</a>
+<a title="" href="http://lucene.apache.org/">Lucene</a>
 </div>
 </div>
 <div id="credit"></div>
@@ -197,7 +197,7 @@
 <div class="section">
 <p>
         The source code can be browsed via the Web at 
-        <a href="http://svn.apache.org/viewcvs.cgi/incubator/nutch/">http://svn.apache.org/viewcvs.cgi/incubator/nutch/</a>.
+        <a href="http://svn.apache.org/viewcvs.cgi/lucene/nutch/">http://svn.apache.org/viewcvs.cgi/lucene/nutch/</a>.
         No SVN client software is required.
       </p>
 </div>
@@ -208,7 +208,7 @@
 <div class="section">
 <p>
         The SVN URL for anonymous users is 
-        <a href="http://svn.apache.org/repos/asf/incubator/nutch/">http://svn.apache.org/repos/asf/incubator/nutch/</a>.
+        <a href="http://svn.apache.org/repos/asf/lucene/nutch/">http://svn.apache.org/repos/asf/lucene/nutch/</a>.
         Instructions for anonymous SVN access are 
         <a href="http://www.apache.org/dev/version-control.html#anon-svn">here</a>.
       </p>
@@ -220,7 +220,7 @@
 <div class="section">
 <p>
         The SVN URL for committers is 
-        <a href="https://svn.apache.org/repos/asf/incubator/nutch/">https://svn.apache.org/repos/asf/incubator/nutch/</a>.
+        <a href="https://svn.apache.org/repos/asf/lucene/nutch/">https://svn.apache.org/repos/asf/lucene/nutch/</a>.
         Instructions for committer SVN access are 
         <a href="http://www.apache.org/dev/version-control.html#https-svn">here</a>.
       </p>

Modified: lucene/nutch/branches/mapred/site/version_control.pdf
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/site/version_control.pdf?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/site/version_control.pdf (original)
+++ lucene/nutch/branches/mapred/site/version_control.pdf Fri Jun  3 12:11:51 2005
@@ -72,7 +72,7 @@
 << /Length 1528 /Filter [ /ASCII85Decode /FlateDecode ]
  >>
 stream
-Gatm<96...@Egh>,+O>bGm`d?_V"#oSPZKn2bspjl]*@#GNS4Y@VL+UNXgaUNb$r\eN,A68UA:ap:)h,h5>O\/dqY&-j.:PD]FH,;1q:DCkPYk]g*u&fd$oO_%]?JL,%p.Z=Kk.OpXe<.POZQakU3EE=17`l@9JB!7]oa$2'r;`c#k85T%u<9?iA=s5;6b<`1FWcRD#k*fdEk'+"b"qn8iD4j22ZDQfUjK/'DcpK[11C3M"K]<Q-`pDB&i*.+L#,Z^ppj\'AkAf4ZNh*44>:KLVl/=qXei10/XZW;]UP<(qOS]t^pTaB8_V&/Lie1p&T?DQ9erPhMLa#F,V@:TRZmIZPj3S-R(,JS(b6[3S(ll&T;l;]7e/"Ea5H!Y+515-2p0C'WkINn-u'FWTOdRRB^H.h#=Gg_@E^WN['9)bs8-""?S/sIZLKQ1ct?l<ujMXl$C.eQ"P%>B?Ep3AYjRZX)35Q0k\QKbY?Q%B#u-K+N<MNXhKdNHIBCk/>^-@<VH5r*.>%4IMiVj!k.Lut[_RZ\<'((1qNROo4gS+]Y5QV,FNe$*5Yl'.1K:1",\Q1"T2^a'-A$%+W>#4Ptd#JmbN\d,Lq34Dsdhf$<NrUFhdfNbWTIblY[=.(JE'Lf?-rD\,n"bCqtq[?k@@0(4F;QCjV@"lk5k47&K365@rf*`d10ol.RqjrD\HAa,8!.5kTOp)8W%(*lBM^aTA1.WaW^jonI?ACuJ?k!r_/Du^L&5^rF41,9uj1'MM%?MiMdPmh\^(#bsXK55#]jMJ*",Q'Y\JhCO#43L[Et't2E>JX99$I@)*/Q,%Hk6*o<sQV^HPG*+^[uHW^#5bfB`nsm9(VG;a)sJ/I<Z3Z(sJQI!,o(M8WKBG^?0Y=O[74_@:0_4^dhau^VcP);+>AG]^Fqk*0Z16MbG3nXX%E_lFVOG]+MYfRBJR'?p,.b>KEDVbc.Yjp<`in0R.Dnj&U1JWcFBUWJR`mKuHrb>?PSKo5&?"2A#U)gJ9.^~>
+Gatm<96...@A>K.c8c9"'AQ*5[0N'.Y\mE?8J]Pq._W(b"R`FX!(j_Nr$'YO_RF6M^IOMI*WZ8XcR(d(%T*\Vsjo<9.&d#J7@2<LIJnU!!Z(I</^TmVB;-n5[!,5F71qSD_?QdPN@IL0$)k'5Jl^;!^9.%nMd+C(8%XX+<V-gO&\FY)0ZCKUVmS6^QF>Hd.aadF]C-.WJ=?!s%EBhgC%n&Pt(94XL-W?M-RO=fSA(^OMT#;&+J`(:asaibLc[;5;J%?IPWKapundLGBdUQb)#GFfDUYZ,8*mdOm0c@'m#,&]),R9)nC;JZh6d,X@L-T$eb-=3HZ(j*r94]#*5lXge.!F<NM?EA[-A0[odBe/*e`oPjQ`.#3,UaD.Oo?~>
 endstream
 endobj
 17 0 obj
@@ -156,10 +156,10 @@
 24 0 obj
 << /Type /Annot
 /Subtype /Link
-/Rect [ 90.0 498.132 332.976 486.132 ]
+/Rect [ 90.0 498.132 318.972 486.132 ]
 /C [ 0 0 0 ]
 /Border [ 0 0 0 ]
-/A << /URI (http://svn.apache.org/viewcvs.cgi/incubator/nutch/)
+/A << /URI (http://svn.apache.org/viewcvs.cgi/lucene/nutch/)
 /S /URI >>
 /H /I
 >>
@@ -167,10 +167,10 @@
 25 0 obj
 << /Type /Annot
 /Subtype /Link
-/Rect [ 279.648 445.798 508.956 433.798 ]
+/Rect [ 279.648 445.798 494.952 433.798 ]
 /C [ 0 0 0 ]
 /Border [ 0 0 0 ]
-/A << /URI (http://svn.apache.org/repos/asf/incubator/nutch/)
+/A << /URI (http://svn.apache.org/repos/asf/lucene/nutch/)
 /S /URI >>
 /H /I
 >>
@@ -189,10 +189,10 @@
 27 0 obj
 << /Type /Annot
 /Subtype /Link
-/Rect [ 250.656 367.064 484.632 355.064 ]
+/Rect [ 250.656 367.064 470.628 355.064 ]
 /C [ 0 0 0 ]
 /Border [ 0 0 0 ]
-/A << /URI (https://svn.apache.org/repos/asf/incubator/nutch/)
+/A << /URI (https://svn.apache.org/repos/asf/lucene/nutch/)
 /S /URI >>
 /H /I
 >>
@@ -200,7 +200,7 @@
 28 0 obj
 << /Type /Annot
 /Subtype /Link
-/Rect [ 297.288 353.864 317.94 341.864 ]
+/Rect [ 237.624 353.864 258.276 341.864 ]
 /C [ 0 0 0 ]
 /Border [ 0 0 0 ]
 /A << /URI (http://www.apache.org/dev/version-control.html#https-svn)
@@ -322,21 +322,21 @@
 xref
 0 39
 0000000000 65535 f 
-0000006626 00000 n 
-0000006691 00000 n 
-0000006783 00000 n 
+0000006618 00000 n 
+0000006683 00000 n 
+0000006775 00000 n 
 0000000015 00000 n 
 0000000071 00000 n 
 0000000669 00000 n 
 0000000789 00000 n 
 0000000835 00000 n 
-0000006906 00000 n 
+0000006898 00000 n 
 0000000970 00000 n 
-0000006969 00000 n 
+0000006961 00000 n 
 0000001107 00000 n 
-0000007035 00000 n 
+0000007027 00000 n 
 0000001244 00000 n 
-0000007101 00000 n 
+0000007093 00000 n 
 0000001381 00000 n 
 0000003002 00000 n 
 0000003125 00000 n 
@@ -346,20 +346,20 @@
 0000003785 00000 n 
 0000003967 00000 n 
 0000004145 00000 n 
-0000004346 00000 n 
-0000004548 00000 n 
-0000004756 00000 n 
-0000004959 00000 n 
-0000007167 00000 n 
-0000005168 00000 n 
-0000005307 00000 n 
-0000005540 00000 n 
-0000005809 00000 n 
-0000006070 00000 n 
-0000006183 00000 n 
-0000006293 00000 n 
-0000006401 00000 n 
-0000006517 00000 n 
+0000004343 00000 n 
+0000004542 00000 n 
+0000004750 00000 n 
+0000004950 00000 n 
+0000007159 00000 n 
+0000005160 00000 n 
+0000005299 00000 n 
+0000005532 00000 n 
+0000005801 00000 n 
+0000006062 00000 n 
+0000006175 00000 n 
+0000006285 00000 n 
+0000006393 00000 n 
+0000006509 00000 n 
 trailer
 <<
 /Size 39
@@ -367,5 +367,5 @@
 /Info 4 0 R
 >>
 startxref
-7218
+7210
 %%EOF

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/analysis/NutchAnalysis.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/analysis/NutchAnalysis.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/analysis/NutchAnalysis.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/analysis/NutchAnalysis.java Fri Jun  3 12:11:51 2005
@@ -122,8 +122,11 @@
       nonOpOrTerm();
       String[] array = (String[])terms.toArray(new String[terms.size()]);
 
-      if (stop && terms.size()==1 && isStopWord(array[0])) {
-        // ignore stop words only when single, unadorned terms
+      if (stop
+          && field == Clause.DEFAULT_FIELD
+          && terms.size()==1
+          && isStopWord(array[0])) {
+        // ignore stop words only when single, unadorned terms in default field
       } else {
         if (prohibited)
           query.addProhibitedPhrase(array, field);
@@ -443,35 +446,11 @@
     finally { jj_save(2, xla); }
   }
 
-  final private boolean jj_3R_17() {
-    Token xsp;
-    xsp = jj_scanpos;
-    if (jj_scan_token(10)) {
-    jj_scanpos = xsp;
-    if (jj_scan_token(11)) {
-    jj_scanpos = xsp;
-    if (jj_scan_token(12)) {
-    jj_scanpos = xsp;
-    if (jj_scan_token(13)) {
-    jj_scanpos = xsp;
-    if (jj_scan_token(14)) return true;
-    }
-    }
-    }
-    }
-    return false;
-  }
-
   final private boolean jj_3R_26() {
     if (jj_3R_16()) return true;
     return false;
   }
 
-  final private boolean jj_3R_8() {
-    if (jj_3R_14()) return true;
-    return false;
-  }
-
   final private boolean jj_3R_16() {
     Token xsp;
     xsp = jj_scanpos;
@@ -496,6 +475,11 @@
     return false;
   }
 
+  final private boolean jj_3R_8() {
+    if (jj_3R_14()) return true;
+    return false;
+  }
+
   final private boolean jj_3R_15() {
     if (jj_3R_11()) return true;
     Token xsp;
@@ -524,18 +508,6 @@
     return false;
   }
 
-  final private boolean jj_3_1() {
-    if (jj_scan_token(WORD)) return true;
-    if (jj_scan_token(COLON)) return true;
-    Token xsp;
-    xsp = jj_scanpos;
-    if (jj_3R_8()) {
-    jj_scanpos = xsp;
-    if (jj_3R_9()) return true;
-    }
-    return false;
-  }
-
   final private boolean jj_3R_23() {
     if (jj_3R_24()) return true;
     return false;
@@ -562,6 +534,18 @@
     return false;
   }
 
+  final private boolean jj_3_1() {
+    if (jj_scan_token(WORD)) return true;
+    if (jj_scan_token(COLON)) return true;
+    Token xsp;
+    xsp = jj_scanpos;
+    if (jj_3R_8()) {
+    jj_scanpos = xsp;
+    if (jj_3R_9()) return true;
+    }
+    return false;
+  }
+
   final private boolean jj_3R_24() {
     Token xsp;
     xsp = jj_scanpos;
@@ -610,13 +594,13 @@
     return false;
   }
 
-  final private boolean jj_3R_9() {
-    if (jj_3R_15()) return true;
+  final private boolean jj_3R_19() {
+    if (jj_3R_24()) return true;
     return false;
   }
 
-  final private boolean jj_3R_19() {
-    if (jj_3R_24()) return true;
+  final private boolean jj_3R_9() {
+    if (jj_3R_15()) return true;
     return false;
   }
 
@@ -651,6 +635,25 @@
     if (jj_scan_token(9)) {
     jj_scanpos = xsp;
     if (jj_scan_token(0)) return true;
+    }
+    return false;
+  }
+
+  final private boolean jj_3R_17() {
+    Token xsp;
+    xsp = jj_scanpos;
+    if (jj_scan_token(10)) {
+    jj_scanpos = xsp;
+    if (jj_scan_token(11)) {
+    jj_scanpos = xsp;
+    if (jj_scan_token(12)) {
+    jj_scanpos = xsp;
+    if (jj_scan_token(13)) {
+    jj_scanpos = xsp;
+    if (jj_scan_token(14)) return true;
+    }
+    }
+    }
     }
     return false;
   }

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/analysis/NutchAnalysis.jj
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/analysis/NutchAnalysis.jj?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/analysis/NutchAnalysis.jj (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/analysis/NutchAnalysis.jj Fri Jun  3 12:11:51 2005
@@ -204,8 +204,11 @@
     {
       String[] array = (String[])terms.toArray(new String[terms.size()]);
 
-      if (stop && terms.size()==1 && isStopWord(array[0])) {
-        // ignore stop words only when single, unadorned terms
+      if (stop
+          && field == Clause.DEFAULT_FIELD
+          && terms.size()==1
+          && isStopWord(array[0])) {
+        // ignore stop words only when single, unadorned terms in default field
       } else {
         if (prohibited)
           query.addProhibitedPhrase(array, field);

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/CrawlDatum.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/CrawlDatum.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/CrawlDatum.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/CrawlDatum.java Fri Jun  3 12:11:51 2005
@@ -38,8 +38,8 @@
   public static final byte STATUS_DB_GONE = 3;
   public static final byte STATUS_LINKED = 4;
   public static final byte STATUS_FETCH_SUCCESS = 5;
-  public static final byte STATUS_FETCH_FAIL_TEMP = 6;
-  public static final byte STATUS_FETCH_FAIL_PERM = 7;
+  public static final byte STATUS_FETCH_RETRY = 6;
+  public static final byte STATUS_FETCH_GONE = 7;
 
   private byte status;
   private long nextFetch = System.currentTimeMillis();

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Fri Jun  3 12:11:51 2005
@@ -77,7 +77,7 @@
       result.setStatus(CrawlDatum.STATUS_DB_FETCHED);
       break;
 
-    case CrawlDatum.STATUS_FETCH_FAIL_TEMP:       // temporary failure
+    case CrawlDatum.STATUS_FETCH_RETRY:           // temporary failure
       result = highest;                           // use new entry
       if (highest.getRetriesSinceFetch() < retryMax) {
         result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
@@ -86,7 +86,7 @@
       }
       break;
 
-    case CrawlDatum.STATUS_FETCH_FAIL_PERM:       // permanent failure
+    case CrawlDatum.STATUS_FETCH_GONE:            // permanent failure
       result = highest;                           // use new entry
       result.setStatus(CrawlDatum.STATUS_DB_GONE);
       break;

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java Fri Jun  3 12:11:51 2005
@@ -39,6 +39,7 @@
   private OutputCollector output;
 
   private int activeThreads;
+  private int maxRedirect;
 
   private long start = System.currentTimeMillis(); // start time of fetcher run
 
@@ -70,21 +71,63 @@
           try {
             LOG.info("fetching " + url);            // fetch the page
             
-            Protocol protocol = ProtocolFactory.getProtocol(url);
-            Content content = protocol.getContent(url);
+            boolean redirecting;
+            int redirectCount = 0;
+            do {
+              redirecting = false;
+              LOG.fine("redirectCount=" + redirectCount);
+              Protocol protocol = ProtocolFactory.getProtocol(url);
+              ProtocolOutput output = protocol.getProtocolOutput(url);
+              ProtocolStatus status = output.getStatus();
+              Content content = output.getContent();
+
+              switch(status.getCode()) {
+
+              case ProtocolStatus.SUCCESS:        // got a page
+                output(key, datum, content, CrawlDatum.STATUS_FETCH_SUCCESS);
+                updateStatus(content.getContent().length);
+                break;
+
+              case ProtocolStatus.MOVED:         // redirect
+              case ProtocolStatus.TEMP_MOVED:
+                url = status.getMessage();
+                if (url != null) {
+                  redirecting = true;
+                  redirectCount++;
+                  LOG.info(" - protocol redirect to " + url);
+                }
+                break;
+
+              case ProtocolStatus.RETRY:          // retry
+              case ProtocolStatus.EXCEPTION:
+                output(key, datum, null, CrawlDatum.STATUS_FETCH_RETRY);
+                break;
+                
+              case ProtocolStatus.GONE:           // gone
+              case ProtocolStatus.NOT_FOUND:
+              case ProtocolStatus.ACCESS_DENIED:
+              case ProtocolStatus.ROBOTS_DENIED:
+              case ProtocolStatus.NOTMODIFIED:
+                output(key, datum, null, CrawlDatum.STATUS_FETCH_GONE);
+                break;
+
+              default:
+                LOG.warning("Unknown ProtocolStatus: " + status.getCode());
+                output(key, datum, null, CrawlDatum.STATUS_FETCH_GONE);
+              }
+
+              if (redirecting && redirectCount >= maxRedirect) {
+                LOG.info(" - redirect count exceeded " + url);
+                output(key, datum, null, CrawlDatum.STATUS_FETCH_GONE);
+              }
 
-            output(url, datum, content, CrawlDatum.STATUS_FETCH_SUCCESS);
-            
-            updateStatus(content.getContent().length);
+            } while (redirecting && (redirectCount < maxRedirect));
 
-          } catch (ResourceGone e) {                // don't retry
-            logError(url, e);
-            output(url, datum, null, CrawlDatum.STATUS_FETCH_FAIL_PERM);
             
-          } catch (Throwable t) {                   // retry all others
+          } catch (Throwable t) {                 // unexpected exception
             logError(url, t);
-            output(url, datum, null, CrawlDatum.STATUS_FETCH_FAIL_TEMP);
-
+            output(key, datum, null, CrawlDatum.STATUS_FETCH_GONE);
+            
           }
         }
 
@@ -103,13 +146,18 @@
       }
     }
 
-    private void output(String url, CrawlDatum datum,
+    private void output(UTF8 key, CrawlDatum datum,
                         Content content, int status) {
+
       datum.setStatus(status);
-      if (content == null)
+
+      if (content == null) {
+        String url = key.toString();
         content = new Content(url, url, new byte[0], "", new Properties());
+      }
+
       try {
-        output.collect(new UTF8(url), new FetcherOutput(datum, content));
+        output.collect(key, new FetcherOutput(datum, content));
       } catch (IOException e) {
         LOG.severe("fetcher caught:"+e.toString());
       }
@@ -152,6 +200,8 @@
     this.input = input;
     this.output = output;
 			
+    this.maxRedirect = getConf().getInt("http.redirect.max", 3);
+    
     int threadCount = getConf().getInt("fetcher.threads.fetch", 10);
     for (int i = 0; i < threadCount; i++) {       // spawn threads
       new FetcherThread().start();

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/ParseSegment.java Fri Jun  3 12:11:51 2005
@@ -49,15 +49,22 @@
   public void map(WritableComparable key, Writable value,
                   OutputCollector output) throws IOException {
     Content content = (Content)value;
+
+    Parse parse = null;
+    ParseStatus status;
     try {
       Parser parser = ParserFactory.getParser(content.getContentType(),
                                               content.getBaseUrl());
-      Parse parse = parser.getParse(content);
-      
+      parse = parser.getParse(content);
+      status = parse.getData().getStatus();
+    } catch (Exception e) {
+      status = new ParseStatus(e);
+    }
+
+    if (status.isSuccess()) {
       output.collect(key, new ParseImpl(parse.getText(), parse.getData()));
-      
-    } catch (ParseException t) {
-      LOG.warning("Error parsing: "+key+": "+t.toString());
+    } else {
+      LOG.warning("Error parsing: "+key+": "+status.toString());
     }
   }
 

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/fetcher/Fetcher.java Fri Jun  3 12:11:51 2005
@@ -20,6 +20,7 @@
 import java.io.File;
 import java.util.Properties;
 
+import org.apache.nutch.net.URLFilters;
 import org.apache.nutch.pagedb.FetchListEntry;
 import org.apache.nutch.io.*;
 import org.apache.nutch.db.*;
@@ -68,6 +69,10 @@
 
   private int threadCount =                       // max number of threads
     NutchConf.get().getInt("fetcher.threads.fetch", 10);
+  private static final float NEW_INJECTED_PAGE_SCORE =
+    NutchConf.get().getFloat("db.score.injected", 2.0f);
+  private static final int MAX_REDIRECT =
+    NutchConf.get().getInt("http.redirect.max", 3);
 
   // All threads (FetcherThread or thread started by it) belong to
   // group "fetcher". Each FetcherThread is named as "fetcherXX",
@@ -110,45 +115,84 @@
           if (!fle.getFetch()) {                  // should we fetch this page?
             if (LOG.isLoggable(Level.FINE))
               LOG.fine("not fetching " + url);
-            handleNoFetch(fle, FetcherOutput.SUCCESS);
+            handleNoFetch(fle, ProtocolStatus.STATUS_NOTFETCHING);
             continue;
           }
 
-          LOG.info("fetching " + url);            // fetch the page
-
-          Protocol protocol = ProtocolFactory.getProtocol(url);
-          Content content = protocol.getContent(url);
-
-          handleFetch(url, fle, content);
-
-          synchronized (Fetcher.this) {           // update status
-            pages++;
-            bytes += content.getContent().length;
-            if ((pages % 100) == 0) {             // show status every 100pp
-              status();
+          // support multiple redirects, if requested by protocol
+          // or content meta-tags (the latter requires running Fetcher
+          // in parsing mode). Protocol-level redirects take precedence over
+          // content-level redirects. Some plugins can handle redirects
+          // automatically, so that only the final success or failure will be
+          // shown here.
+          boolean refetch = false;
+          int redirCnt = 0;
+          do {
+            LOG.fine("redirCnt=" + redirCnt);
+            refetch = false;
+            LOG.info("fetching " + url);            // fetch the page
+            Protocol protocol = ProtocolFactory.getProtocol(url);
+            ProtocolOutput output = protocol.getProtocolOutput(fle);
+            ProtocolStatus pstat = output.getStatus();
+            Content content = output.getContent();
+            switch(pstat.getCode()) {
+              case ProtocolStatus.SUCCESS:
+                if (content != null) {
+                  synchronized (Fetcher.this) {           // update status
+                    pages++;
+                    bytes += content.getContent().length;
+                    if ((pages % 100) == 0) {             // show status every 100pp
+                      status();
+                    }
+                  }
+                  ParseStatus ps = handleFetch(url, fle, output);
+                  if (ps != null && ps.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
+                    url = ps.getMessage();
+                    url = URLFilters.filter(url);
+                    if (url != null) {
+                      refetch = true;
+                      redirCnt++;
+                      fle = new FetchListEntry(true, new Page(url, NEW_INJECTED_PAGE_SCORE), new String[0]);
+                      LOG.info(" - content redirect to " + url);
+                    }
+                  }
+                }
+                break;
+              case ProtocolStatus.MOVED: // try to redirect immediately
+              case ProtocolStatus.TEMP_MOVED: // try to redirect immediately
+                // record the redirect. perhaps the DB will want to know this.
+                handleNoFetch(fle, pstat);
+                url = pstat.getMessage();
+                if (url != null) {
+                  refetch = true;
+                  redirCnt++;
+                  // create new entry.
+                  fle = new FetchListEntry(true, new Page(url, NEW_INJECTED_PAGE_SCORE), new String[0]);
+                  LOG.info(" - protocol redirect to " + url);
+                }
+                break;
+              case ProtocolStatus.GONE:
+              case ProtocolStatus.NOT_FOUND:
+              case ProtocolStatus.ACCESS_DENIED:
+              case ProtocolStatus.ROBOTS_DENIED:
+              case ProtocolStatus.RETRY:
+              case ProtocolStatus.NOTMODIFIED:
+                handleNoFetch(fle, pstat);
+                break;
+              case ProtocolStatus.EXCEPTION:
+                logError(url, fle, new Exception(pstat.getMessage()));                // retry?
+                handleNoFetch(fle, pstat);
+              break;
+              default:
+                LOG.warning("Unknown ProtocolStatus: " + pstat.getCode());
+                handleNoFetch(fle, pstat);
             }
-          }
-        } catch (ResourceGone e) {                // don't retry
-          logError(url, fle, e);
-          handleNoFetch(fle, FetcherOutput.NOT_FOUND);
-
-        // dealt with in handleFetch() below
-        //} catch (ParseException e) {              // don't retry
-        //  logError(url, fle, e);
-        //  handleNoFetch(fle, FetcherOutput.CANT_PARSE);
-
-        } catch (RetryLater e) {                  // explicit retry
-          logError(url, fle, e);
-          handleNoFetch(fle, FetcherOutput.RETRY);
-
-        } catch (ProtocolException e) {           // implicit retry
-          logError(url, fle, e);
-          handleNoFetch(fle, FetcherOutput.RETRY);
+          } while (refetch && (redirCnt < MAX_REDIRECT));
 
         } catch (Throwable t) {                   // an unchecked exception
           if (fle != null) {
             logError(url, fle, t);                // retry?
-            handleNoFetch(fle, FetcherOutput.RETRY);
+            handleNoFetch(fle, new ProtocolStatus(t));
           }
         }
       }
@@ -176,36 +220,44 @@
       }
     }
 
-    private void handleFetch(String url, FetchListEntry fle, Content content) {
+    private ParseStatus handleFetch(String url, FetchListEntry fle, ProtocolOutput output) {
+      Content content = output.getContent();
+      ProtocolStatus protocolStatus = output.getStatus();
       if (!Fetcher.this.parsing) {
         outputPage(new FetcherOutput(fle, MD5Hash.digest(content.getContent()),
-                                    FetcherOutput.SUCCESS),
+                protocolStatus),
                 content, null, null);
-        return;
+        return null;
       }
 
-      try {
         String contentType = content.getContentType();
-        Parser parser = ParserFactory.getParser(contentType, url);
-        Parse parse = parser.getParse(content);
-        outputPage(new FetcherOutput(fle, MD5Hash.digest(content.getContent()),
-                                    FetcherOutput.SUCCESS),
-                content, new ParseText(parse.getText()), parse.getData());
-      } catch (ParseException e) {
-        // 20041026, xing
-        // If fetching succeeds, but parsing fails, content should be saved
-        // so that we can try to parse again in separate pass, possibly
-        // using better/alternative parser.
-        LOG.info("fetch okay, but can't parse " + url + ", reason: "
-          + e.getMessage());
-        outputPage(new FetcherOutput(fle, MD5Hash.digest(content.getContent()),
-                                    FetcherOutput.CANT_PARSE),
-                content, new ParseText(""),
-                new ParseData("", new Outlink[0], new Properties()));
-      }
+        Parser parser = null;
+        Parse parse = null;
+        ParseStatus status = null;
+        try {
+          parser = ParserFactory.getParser(contentType, url);
+          parse = parser.getParse(content);
+          status = parse.getData().getStatus();
+        } catch (Exception e) {
+          e.printStackTrace();
+          status = new ParseStatus(e);
+        }
+        if (status.isSuccess()) {
+          outputPage(new FetcherOutput(fle, MD5Hash.digest(content.getContent()),
+                  protocolStatus),
+                  content, new ParseText(parse.getText()), parse.getData());
+        } else {
+          LOG.info("fetch okay, but can't parse " + url + ", reason: "
+                  + status.toString());
+          outputPage(new FetcherOutput(fle, MD5Hash.digest(content.getContent()),
+                  protocolStatus),
+                  content, new ParseText(""),
+                  new ParseData(status, "", new Outlink[0], new Properties()));
+        }
+        return status;
     }
 
-    private void handleNoFetch(FetchListEntry fle, int status) {
+    private void handleNoFetch(FetchListEntry fle, ProtocolStatus status) {
       String url = fle.getPage().getURL().toString();
       MD5Hash hash = MD5Hash.digest(url);
 
@@ -213,7 +265,7 @@
         outputPage(new FetcherOutput(fle, hash, status),
                    new Content(url, url, new byte[0], "", new Properties()),
                    new ParseText(""),
-                   new ParseData("", new Outlink[0], new Properties()));
+                   new ParseData(ParseStatus.STATUS_NOTPARSED, "", new Outlink[0], new Properties()));
       } else {
         outputPage(new FetcherOutput(fle, hash, status),
                    new Content(url, url, new byte[0], "", new Properties()),
@@ -234,6 +286,7 @@
         }
       } catch (Throwable t) {
         LOG.severe("error writing output:" + t.toString());
+        t.printStackTrace();
       }
     }
                                        
@@ -429,7 +482,7 @@
     }
 
     // set log level
-    fetcher.setLogLevel(Level.parse(logLevel.toUpperCase()));
+    setLogLevel(Level.parse(logLevel.toUpperCase()));
 
     if (showThreadID) {
       LogFormatter.setShowThreadIDs(showThreadID);

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/fetcher/FetcherOutput.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/fetcher/FetcherOutput.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/fetcher/FetcherOutput.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/fetcher/FetcherOutput.java Fri Jun  3 12:11:51 2005
@@ -26,6 +26,8 @@
 import org.apache.nutch.pagedb.FetchListEntry;
 import org.apache.nutch.tools.UpdateDatabaseTool;
 import org.apache.nutch.parse.Outlink;
+import org.apache.nutch.parse.ParseStatus;
+import org.apache.nutch.protocol.ProtocolStatus;
 
 /*********************************************
  * An entry in the fetcher's output.  This includes all of the fetcher output
@@ -50,25 +52,34 @@
   public static final String DONE_NAME = "fetcher.done";
   public static final String ERROR_NAME = "fetcher.error";
 
-  private final static byte VERSION = 4;
+  private final static byte VERSION = 5;
 
-  public final static byte RETRY = 0;
-  public final static byte SUCCESS = 1;
-  public final static byte NOT_FOUND = 2;
-  public final static byte CANT_PARSE = 4; // fetched, but can't be parsed
+  // backwards compatibility codes
+  private final static byte RETRY = 0;
+  private final static byte SUCCESS = 1;
+  private final static byte NOT_FOUND = 2;
+  private final static byte CANT_PARSE = 4; // fetched, but can't be parsed
+  
+  private static final byte[] oldToNewMap = {
+          ProtocolStatus.RETRY,
+          ProtocolStatus.SUCCESS,
+          ProtocolStatus.NOT_FOUND,
+          ProtocolStatus.FAILED,
+          ProtocolStatus.RETRY
+  };
 
   private FetchListEntry fetchListEntry;
   private MD5Hash md5Hash;
-  private int status;
+  private ProtocolStatus protocolStatus;
   private long fetchDate;
 
   public FetcherOutput() {}
 
   public FetcherOutput(FetchListEntry fetchListEntry,
-                       MD5Hash md5Hash, int status) {
+                       MD5Hash md5Hash, ProtocolStatus protocolStatus) {
     this.fetchListEntry = fetchListEntry;
     this.md5Hash = md5Hash;
-    this.status = status;
+    this.protocolStatus = protocolStatus;
     this.fetchDate = System.currentTimeMillis();
   }
 
@@ -78,7 +89,12 @@
     byte version = in.readByte();                 // read version
     fetchListEntry = FetchListEntry.read(in);
     md5Hash = MD5Hash.read(in);
-    status = in.readByte();
+    if (version < 5) {
+      int status = in.readByte();
+      protocolStatus = new ProtocolStatus(oldToNewMap[status]);
+    } else {
+      protocolStatus = ProtocolStatus.read(in);
+    }
 
     if (version < 4) {
       UTF8.readString(in);                        // read & ignore title
@@ -95,7 +111,7 @@
     out.writeByte(VERSION);                       // store current version
     fetchListEntry.write(out);
     md5Hash.write(out);
-    out.writeByte(status);
+    protocolStatus.write(out);
     out.writeLong(fetchDate);
   }
 
@@ -110,8 +126,8 @@
   //
   public FetchListEntry getFetchListEntry() { return fetchListEntry; }
   public MD5Hash getMD5Hash() { return md5Hash; }
-  public int getStatus() { return status; }
-  public void setStatus(int status) { this.status = status; }
+  public ProtocolStatus getProtocolStatus() { return protocolStatus; }
+  public void setProtocolStatus(ProtocolStatus protocolStatus) { this.protocolStatus = protocolStatus; }
   public long getFetchDate() { return fetchDate; }
   public void setFetchDate(long fetchDate) { this.fetchDate = fetchDate; }
 
@@ -126,7 +142,7 @@
     return
       this.fetchListEntry.equals(other.fetchListEntry) &&
       this.md5Hash.equals(other.md5Hash) &&
-      (this.status == other.status);
+      this.protocolStatus.equals(other.protocolStatus);
   }
 
 
@@ -134,7 +150,7 @@
     StringBuffer buffer = new StringBuffer();
     buffer.append("FetchListEntry: " + fetchListEntry + "Fetch Result:\n" );
     buffer.append("MD5Hash: " + md5Hash + "\n" );
-    buffer.append("Status: " + status + "\n" );
+    buffer.append("ProtocolStatus: " + protocolStatus + "\n" );
     buffer.append("FetchDate: " + new Date(fetchDate) + "\n" );
     return buffer.toString();
   }

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/indexer/IndexSegment.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/indexer/IndexSegment.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/indexer/IndexSegment.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/indexer/IndexSegment.java Fri Jun  3 12:11:51 2005
@@ -134,7 +134,7 @@
             if (!sr.next(fetcherOutput, null, parseText, parseData)) continue;
 
               // only index the page if it was fetched correctly
-              if (fetcherOutput.getStatus() != FetcherOutput.SUCCESS) {
+              if (!fetcherOutput.getProtocolStatus().isSuccess()) {
                   continue;                              
               }
 

Copied: lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HTMLMetaTags.java (from r179837, lucene/nutch/trunk/src/java/org/apache/nutch/parse/HTMLMetaTags.java)
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HTMLMetaTags.java?p2=lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HTMLMetaTags.java&p1=lucene/nutch/trunk/src/java/org/apache/nutch/parse/HTMLMetaTags.java&r1=179837&r2=179858&rev=179858&view=diff
==============================================================================
    (empty)

Propchange: lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HTMLMetaTags.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HtmlParseFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HtmlParseFilter.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HtmlParseFilter.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HtmlParseFilter.java Fri Jun  3 12:11:51 2005
@@ -30,6 +30,5 @@
 
   /** Adds metadata or otherwise modifies a parse of HTML content, given
    * the DOM tree of a page. */
-  Parse filter(Content content, Parse parse, DocumentFragment doc)
-    throws ParseException;
+  Parse filter(Content content, Parse parse, HTMLMetaTags metaTags, DocumentFragment doc);
 }

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HtmlParseFilters.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HtmlParseFilters.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HtmlParseFilters.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/HtmlParseFilters.java Fri Jun  3 12:11:51 2005
@@ -45,11 +45,11 @@
   private  HtmlParseFilters() {}                  // no public ctor
 
   /** Run all defined filters. */
-  public static Parse filter(Content content,Parse parse,DocumentFragment doc)
-    throws ParseException {
+  public static Parse filter(Content content, Parse parse, HTMLMetaTags metaTags, DocumentFragment doc) {
 
     for (int i = 0 ; i < CACHE.length; i++) {
-      parse = CACHE[i].filter(content, parse, doc);
+      parse = CACHE[i].filter(content, parse, metaTags, doc);
+      if (!parse.getData().getStatus().isSuccess()) break;
     }
 
     return parse;

Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/Parse.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/Parse.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/Parse.java (original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/parse/Parse.java Fri Jun  3 12:11:51 2005
@@ -20,6 +20,7 @@
  * @see Parser#getParse(FetcherOutput,Content)
  */
 public interface Parse {
+  
   /** The textual content of the page. This is indexed, searched, and used when
    * generating snippets.*/ 
   String getText();