You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/06/03 21:11:56 UTC
svn commit: r179858 [3/3] - in /lucene/nutch/branches/mapred: ./ conf/ site/
src/java/org/apache/nutch/analysis/ src/java/org/apache/nutch/crawl/
src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/indexer/
src/java/org/apache/nutch/parse/ src/java/org/apache/nutch/plugin/
src/java/org/apache/nutch/protocol/ src/java/org/apache/nutch/searcher/
src/java/org/apache/nutch/tools/ src/java/org/apache/nutch/util/
src/plugin/ src/plugin/creativecommons/src/java/org/creativecommons/nutch/
src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/
src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/
src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/
src/plugin/parse-html/ src/plugin/parse-html/lib/
src/plugin/parse-html/src/java/org/apache/nutch/parse/html/
src/plugin/parse-html/src/test/org/apache/nutch/parse/html/
src/plugin/parse-js/ src/plugin/parse-js/src/ src/plugin/parse-js/src/java/
src/plugin/parse-js/src/java/org/ src/plugin/parse-js/src/java/org/apache/
src/plugin/parse-js/src/java/org/apache/nutch/
src/plugin/parse-js/src/java/org/apache/nutch/parse/
src/plugin/parse-js/src/java/org/apache/nutch/parse/js/
src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/
src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/
src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/
src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/
src/plugin/parse-text/src/java/org/apache/nutch/parse/text/
src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/
src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/
src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/
src/plugin/protocol-httpclient/ src/plugin/protocol-httpclient/lib/
src/plugin/protocol-httpclient/src/ src/plugin/protocol-httpclient/src/java/
src/plugin/protocol-httpclient/src/java/org/
src/plugin/protocol-httpclient/src/java/org/apache/
src/plugin/protocol-httpclient/src/java/org/apache/nutch/
src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/
src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/
src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/
src/site/src/documentation/ src/site/src/documentation/content/xdocs/
src/test/org/apache/nutch/analysis/ src/test/org/apache/nutch/fetcher/
src/test/org/apache/nutch/parse/ src/test/org/apache/nutch/tools/
src/test/org/apache/nutch/util/
Copied: lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html (from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html)
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html?p2=lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html&p1=lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html&r1=179837&r2=179858&rev=179858&view=diff
==============================================================================
(empty)
Propchange: lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java Fri Jun 3 12:11:51 2005
@@ -83,7 +83,7 @@
Reader reader = NutchConf.get().getConfResourceAsReader(file);
if (reader == null) {
- LOG.severe("Can't find resource: " + file);
+ trie = new PrefixStringMatcher(new String[0]);
} else {
trie = readConfigurationFile(reader);
}
Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml Fri Jun 3 12:11:51 2005
@@ -70,7 +70,7 @@
<p>For example, the header file for an English translation is filed
as <a
-href="http://svn.apache.org/repos/asf/incubator/nutch/trunk/src/web/include/en/header.xml"><tt>src/web/include/en/header.xml</tt></a>.</p>
+href="http://svn.apache.org/repos/asf/lucene/nutch/trunk/src/web/include/en/header.xml"><tt>src/web/include/en/header.xml</tt></a>.</p>
</section>
@@ -110,7 +110,7 @@
<p>For example, the English language "about" page is filed
as <a
-href="http://svn.apache.org/repos/asf/incubator/nutch/trunk/src/web/pages/en/about.xml"><tt>src/web/pages/en/about.xml</tt></a>.</p>
+href="http://svn.apache.org/repos/asf/lucene/nutch/trunk/src/web/pages/en/about.xml"><tt>src/web/pages/en/about.xml</tt></a>.</p>
</section>
@@ -128,12 +128,12 @@
<p>These property files are filed as
<tt>src/web/locale/org/nutch/jsp/<i>page</i>_<i>language</i>.xml</tt>
where <i>page</i> is the name of the JSP page in <a
-href="http://svn.apache.org/repos/asf/incubator/nutch/trunk/src/web/jsp/"><tt>src/web/jsp/</tt></a>
+href="http://svn.apache.org/repos/asf/lucene/nutch/trunk/src/web/jsp/"><tt>src/web/jsp/</tt></a>
and <i>language</i> is the IS0639 language code, as above.</p>
<p>For example, text for the English language search results page is filed
as <a
-href="http://svn.apache.org/repos/asf/incubator/nutch/trunk/src/web/locale/org/nutch/jsp/search_en.properties"><tt>src/web/locale/org/nutch/jsp/search_en.properties</tt></a>.
+href="http://svn.apache.org/repos/asf/lucene/nutch/trunk/src/web/locale/org/nutch/jsp/search_en.properties"><tt>src/web/locale/org/nutch/jsp/search_en.properties</tt></a>.
This contains something like:</p>
<pre>
Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml Fri Jun 3 12:11:51 2005
@@ -43,7 +43,7 @@
</projects>
<external-refs>
- <incubator href="http://incubator.apache.org/" />
+ <lucene href="http://lucene.apache.org/" />
<wiki href="http://wiki.apache.org/nutch/" />
<lucene href="http://jakarta.apache.org/lucene/" />
</external-refs>
Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/version_control.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/version_control.xml?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/version_control.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/version_control.xml Fri Jun 3 12:11:51 2005
@@ -25,7 +25,7 @@
<title>Web Access (read-only)</title>
<p>
The source code can be browsed via the Web at
- <a href="http://svn.apache.org/viewcvs.cgi/incubator/nutch/">http://svn.apache.org/viewcvs.cgi/incubator/nutch/</a>.
+ <a href="http://svn.apache.org/viewcvs.cgi/lucene/nutch/">http://svn.apache.org/viewcvs.cgi/lucene/nutch/</a>.
No SVN client software is required.
</p>
</section>
@@ -34,7 +34,7 @@
<title>Anonymous Access (read-only)</title>
<p>
The SVN URL for anonymous users is
- <a href="http://svn.apache.org/repos/asf/incubator/nutch/">http://svn.apache.org/repos/asf/incubator/nutch/</a>.
+ <a href="http://svn.apache.org/repos/asf/lucene/nutch/">http://svn.apache.org/repos/asf/lucene/nutch/</a>.
Instructions for anonymous SVN access are
<a href="http://www.apache.org/dev/version-control.html#anon-svn">here</a>.
</p>
@@ -44,7 +44,7 @@
<title>Committer Access (read-write)</title>
<p>
The SVN URL for committers is
- <a href="https://svn.apache.org/repos/asf/incubator/nutch/">https://svn.apache.org/repos/asf/incubator/nutch/</a>.
+ <a href="https://svn.apache.org/repos/asf/lucene/nutch/">https://svn.apache.org/repos/asf/lucene/nutch/</a>.
Instructions for committer SVN access are
<a href="http://www.apache.org/dev/version-control.html#https-svn">here</a>.
</p>
Modified: lucene/nutch/branches/mapred/src/site/src/documentation/skinconf.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/skinconf.xml?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/skinconf.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/skinconf.xml Fri Jun 3 12:11:51 2005
@@ -66,14 +66,14 @@
<!-- project logo -->
<project-name>Nutch</project-name>
<project-description>Open Source Web Search Software</project-description>
- <project-url>http://incubator.apache.org/nutch/</project-url>
+ <project-url>http://lucene.apache.org/nutch/</project-url>
<project-logo>images/nutch-logo.gif</project-logo>
<!-- group logo -->
- <group-name>Incubator</group-name>
- <group-description>Apache Incubator</group-description>
- <group-url>http://incubator.apache.org/</group-url>
- <group-logo>http://incubator.apache.org/images/apache-incubator-logo.png</group-logo>
+ <group-name>Lucene</group-name>
+ <group-description>Apache Lucene</group-description>
+ <group-url>http://lucene.apache.org/</group-url>
+ <group-logo>http://lucene.apache.org/java/docs/images/lucene_green_150.gif</group-logo>
<!-- optional host logo (e.g. sourceforge logo)
default skin: renders it at the bottom-left corner -->
@@ -96,8 +96,8 @@
For some skins just set the attributes to blank.
-->
<trail>
- <link1 name="Incubator" href="http://incubator.apache.org/"/>
- <link2 name="Nutch" href="http://incubator.apache.org/nutch/"/>
+ <link1 name="Lucene" href="http://lucene.apache.org/"/>
+ <link2 name="Nutch" href="http://lucene.apache.org/nutch/"/>
<link3 name="" href=""/>
</trail>
Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/analysis/TestQueryParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/analysis/TestQueryParser.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/analysis/TestQueryParser.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/analysis/TestQueryParser.java Fri Jun 3 12:11:51 2005
@@ -69,7 +69,8 @@
//STOPWORD
assertQueryEquals("the", "");
- assertQueryEquals("field:the -y", "-y");
+ assertQueryEquals("field:the -y", "field:the -y");
+ assertQueryEquals("\"the y\"", "\"the y\"");
assertQueryEquals("+the -y", "the -y");
//PHRASE
Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/fetcher/TestFetcherOutput.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/fetcher/TestFetcherOutput.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/fetcher/TestFetcherOutput.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/fetcher/TestFetcherOutput.java Fri Jun 3 12:11:51 2005
@@ -19,6 +19,8 @@
import java.io.*;
import org.apache.nutch.io.*;
import org.apache.nutch.pagedb.*;
+import org.apache.nutch.protocol.ProtocolStatus;
+
import junit.framework.TestCase;
/** Unit tests for FetcherOutput. */
@@ -32,7 +34,7 @@
FetcherOutput o =
new FetcherOutput(new FetchListEntry(true, TestPage.getTestPage(),
anchors),
- TestMD5Hash.getTestHash(), FetcherOutput.SUCCESS);
+ TestMD5Hash.getTestHash(), ProtocolStatus.STATUS_SUCCESS);
TestWritable.testWritable(o);
Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/parse/TestParseData.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/parse/TestParseData.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/parse/TestParseData.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/parse/TestParseData.java Fri Jun 3 12:11:51 2005
@@ -40,7 +40,7 @@
metaData.put("Language", "en/us");
metaData.put("Charset", "UTF-8");
- ParseData r = new ParseData(title, outlinks, metaData);
+ ParseData r = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metaData);
TestWritable.testWritable(r);
}
Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/tools/TestSegmentMergeTool.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/tools/TestSegmentMergeTool.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/tools/TestSegmentMergeTool.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/tools/TestSegmentMergeTool.java Fri Jun 3 12:11:51 2005
@@ -22,17 +22,17 @@
import org.apache.nutch.db.Page;
import org.apache.nutch.fetcher.FetcherOutput;
-import org.apache.nutch.io.ArrayFile;
import org.apache.nutch.io.MD5Hash;
import org.apache.nutch.fs.*;
import org.apache.nutch.segment.SegmentReader;
import org.apache.nutch.segment.SegmentWriter;
-import org.apache.nutch.util.*;
import org.apache.nutch.pagedb.FetchListEntry;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.ParseData;
+import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.ParseText;
import org.apache.nutch.protocol.Content;
+import org.apache.nutch.protocol.ProtocolStatus;
import junit.framework.TestCase;
@@ -81,7 +81,7 @@
}
url += "/example.html";
FetchListEntry fle = new FetchListEntry(true, new Page(url, 1.0f), new String[] { "test" + rnd });
- FetcherOutput fo = new FetcherOutput(fle, MD5Hash.digest(url), FetcherOutput.SUCCESS);
+ FetcherOutput fo = new FetcherOutput(fle, MD5Hash.digest(url), ProtocolStatus.STATUS_SUCCESS);
StringBuffer content = new StringBuffer("<html><body><h1>Hello from Page " + i + "</h1>");
if (unique) {
content.append("<p>Created at epoch time: " + System.currentTimeMillis() + ", " + r.nextLong() + "</p>");
@@ -95,7 +95,7 @@
meta.setProperty("Host", "http://localhost");
meta.setProperty("Connection", "Keep-alive, close");
Content co = new Content(url, "http://www.example.com", content.toString().getBytes("UTF-8"), "text/html", meta);
- ParseData pd = new ParseData("Hello from Page " + i, new Outlink[0], meta);
+ ParseData pd = new ParseData(ParseStatus.STATUS_SUCCESS, "Hello from Page " + i, new Outlink[0], meta);
StringBuffer text = new StringBuffer("Hello from Page" + i);
if (unique) {
text.append("\nCreated at epoch time: " + System.currentTimeMillis() + ", " + r.nextLong());