You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/06/03 21:11:56 UTC

svn commit: r179858 [3/3] - in /lucene/nutch/branches/mapred: ./ conf/ site/ src/java/org/apache/nutch/analysis/ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/parse/ src/java/org/apache/nutch/plugin/ src/java/org/apache/nutch/protocol/ src/java/org/apache/nutch/searcher/ src/java/org/apache/nutch/tools/ src/java/org/apache/nutch/util/ src/plugin/ src/plugin/creativecommons/src/java/org/creativecommons/nutch/ src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/ src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/ src/plugin/parse-html/ src/plugin/parse-html/lib/ src/plugin/parse-html/src/java/org/apache/nutch/parse/html/ src/plugin/parse-html/src/test/org/apache/nutch/parse/html/ src/plugin/parse-js/ src/plugin/parse-js/src/ src/plugin/parse-js/src/java/ src/plugin/parse-js/src/java/org/ src/plugin/parse-js/src/java/org/apache/ src/plugin/parse-js/src/java/org/apache/nutch/ src/plugin/parse-js/src/java/org/apache/nutch/parse/ src/plugin/parse-js/src/java/org/apache/nutch/parse/js/ src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/ src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/ src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/ src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/ src/plugin/parse-text/src/java/org/apache/nutch/parse/text/ src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/ src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/ src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/ src/plugin/protocol-httpclient/ src/plugin/protocol-httpclient/lib/ src/plugin/protocol-httpclient/src/ src/plugin/protocol-httpclient/src/java/ src/plugin/protocol-httpclient/src/java/org/ src/plugin/protocol-httpclient/src/java/org/apache/ src/plugin/protocol-httpclient/src/java/org/apache/nutch/ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/ src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/ src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/ src/site/src/documentation/ src/site/src/documentation/content/xdocs/ src/test/org/apache/nutch/analysis/ src/test/org/apache/nutch/fetcher/ src/test/org/apache/nutch/parse/ src/test/org/apache/nutch/tools/ src/test/org/apache/nutch/util/

Copied: lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html (from r179837, lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html)
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html?p2=lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html&p1=lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html&r1=179837&r2=179858&rev=179858&view=diff
==============================================================================
    (empty)

Propchange: lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java Fri Jun  3 12:11:51 2005
@@ -83,7 +83,7 @@
     Reader reader = NutchConf.get().getConfResourceAsReader(file);
 
     if (reader == null) {
-      LOG.severe("Can't find resource: " + file);
+      trie = new PrefixStringMatcher(new String[0]);
     } else {
       trie = readConfigurationFile(reader);
     }

Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml Fri Jun  3 12:11:51 2005
@@ -70,7 +70,7 @@
 
 <p>For example, the header file for an English translation is filed
 as <a
-href="http://svn.apache.org/repos/asf/incubator/nutch/trunk/src/web/include/en/header.xml"><tt>src/web/include/en/header.xml</tt></a>.</p>
+href="http://svn.apache.org/repos/asf/lucene/nutch/trunk/src/web/include/en/header.xml"><tt>src/web/include/en/header.xml</tt></a>.</p>
 
 
 </section>
@@ -110,7 +110,7 @@
 
 <p>For example, the English language "about" page is filed
 as <a
-href="http://svn.apache.org/repos/asf/incubator/nutch/trunk/src/web/pages/en/about.xml"><tt>src/web/pages/en/about.xml</tt></a>.</p>
+href="http://svn.apache.org/repos/asf/lucene/nutch/trunk/src/web/pages/en/about.xml"><tt>src/web/pages/en/about.xml</tt></a>.</p>
 
 </section>
 
@@ -128,12 +128,12 @@
 <p>These property files are filed as
 <tt>src/web/locale/org/nutch/jsp/<i>page</i>_<i>language</i>.xml</tt>
 where <i>page</i> is the name of the JSP page in <a
-href="http://svn.apache.org/repos/asf/incubator/nutch/trunk/src/web/jsp/"><tt>src/web/jsp/</tt></a>
+href="http://svn.apache.org/repos/asf/lucene/nutch/trunk/src/web/jsp/"><tt>src/web/jsp/</tt></a>
 and <i>language</i> is the IS0639 language code, as above.</p>
 
 <p>For example, text for the English language search results page is filed
 as <a
-href="http://svn.apache.org/repos/asf/incubator/nutch/trunk/src/web/locale/org/nutch/jsp/search_en.properties"><tt>src/web/locale/org/nutch/jsp/search_en.properties</tt></a>.
+href="http://svn.apache.org/repos/asf/lucene/nutch/trunk/src/web/locale/org/nutch/jsp/search_en.properties"><tt>src/web/locale/org/nutch/jsp/search_en.properties</tt></a>.
  This contains something like:</p>
 
 <pre>

Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml Fri Jun  3 12:11:51 2005
@@ -43,7 +43,7 @@
   </projects>
 
   <external-refs>
-    <incubator href="http://incubator.apache.org/" />
+    <lucene    href="http://lucene.apache.org/" />
     <wiki      href="http://wiki.apache.org/nutch/" />
     <lucene    href="http://jakarta.apache.org/lucene/" />
   </external-refs>

Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/version_control.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/version_control.xml?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/version_control.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/version_control.xml Fri Jun  3 12:11:51 2005
@@ -25,7 +25,7 @@
       <title>Web Access (read-only)</title>
       <p>
         The source code can be browsed via the Web at 
-        <a href="http://svn.apache.org/viewcvs.cgi/incubator/nutch/">http://svn.apache.org/viewcvs.cgi/incubator/nutch/</a>.
+        <a href="http://svn.apache.org/viewcvs.cgi/lucene/nutch/">http://svn.apache.org/viewcvs.cgi/lucene/nutch/</a>.
         No SVN client software is required.
       </p>
     </section>
@@ -34,7 +34,7 @@
       <title>Anonymous Access (read-only)</title>
       <p>
         The SVN URL for anonymous users is 
-        <a href="http://svn.apache.org/repos/asf/incubator/nutch/">http://svn.apache.org/repos/asf/incubator/nutch/</a>.
+        <a href="http://svn.apache.org/repos/asf/lucene/nutch/">http://svn.apache.org/repos/asf/lucene/nutch/</a>.
         Instructions for anonymous SVN access are 
         <a href="http://www.apache.org/dev/version-control.html#anon-svn">here</a>.
       </p>
@@ -44,7 +44,7 @@
       <title>Committer Access (read-write)</title>
       <p>
         The SVN URL for committers is 
-        <a href="https://svn.apache.org/repos/asf/incubator/nutch/">https://svn.apache.org/repos/asf/incubator/nutch/</a>.
+        <a href="https://svn.apache.org/repos/asf/lucene/nutch/">https://svn.apache.org/repos/asf/lucene/nutch/</a>.
         Instructions for committer SVN access are 
         <a href="http://www.apache.org/dev/version-control.html#https-svn">here</a>.
       </p>

Modified: lucene/nutch/branches/mapred/src/site/src/documentation/skinconf.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/skinconf.xml?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/skinconf.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/skinconf.xml Fri Jun  3 12:11:51 2005
@@ -66,14 +66,14 @@
   <!-- project logo -->
   <project-name>Nutch</project-name>
   <project-description>Open Source Web Search Software</project-description>
-  <project-url>http://incubator.apache.org/nutch/</project-url>
+  <project-url>http://lucene.apache.org/nutch/</project-url>
   <project-logo>images/nutch-logo.gif</project-logo>
 
   <!-- group logo -->
-  <group-name>Incubator</group-name>
-  <group-description>Apache Incubator</group-description>
-  <group-url>http://incubator.apache.org/</group-url>
-  <group-logo>http://incubator.apache.org/images/apache-incubator-logo.png</group-logo>
+  <group-name>Lucene</group-name>
+  <group-description>Apache Lucene</group-description>
+  <group-url>http://lucene.apache.org/</group-url>
+  <group-logo>http://lucene.apache.org/java/docs/images/lucene_green_150.gif</group-logo>
 
   <!-- optional host logo (e.g. sourceforge logo)
        default skin: renders it at the bottom-left corner -->
@@ -96,8 +96,8 @@
     For some skins just set the attributes to blank.
   -->
   <trail>
-    <link1 name="Incubator" href="http://incubator.apache.org/"/>
-    <link2 name="Nutch" href="http://incubator.apache.org/nutch/"/>
+    <link1 name="Lucene" href="http://lucene.apache.org/"/>
+    <link2 name="Nutch" href="http://lucene.apache.org/nutch/"/>
     <link3 name="" href=""/>
   </trail>
 

Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/analysis/TestQueryParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/analysis/TestQueryParser.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/analysis/TestQueryParser.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/analysis/TestQueryParser.java Fri Jun  3 12:11:51 2005
@@ -69,7 +69,8 @@
 
     //STOPWORD
     assertQueryEquals("the", "");
-    assertQueryEquals("field:the -y", "-y");
+    assertQueryEquals("field:the -y", "field:the -y");
+    assertQueryEquals("\"the y\"", "\"the y\"");
     assertQueryEquals("+the -y", "the -y");
 
     //PHRASE

Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/fetcher/TestFetcherOutput.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/fetcher/TestFetcherOutput.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/fetcher/TestFetcherOutput.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/fetcher/TestFetcherOutput.java Fri Jun  3 12:11:51 2005
@@ -19,6 +19,8 @@
 import java.io.*;
 import org.apache.nutch.io.*;
 import org.apache.nutch.pagedb.*;
+import org.apache.nutch.protocol.ProtocolStatus;
+
 import junit.framework.TestCase;
 
 /** Unit tests for FetcherOutput. */
@@ -32,7 +34,7 @@
     FetcherOutput o =
       new FetcherOutput(new FetchListEntry(true, TestPage.getTestPage(),
                                            anchors),
-                        TestMD5Hash.getTestHash(), FetcherOutput.SUCCESS);
+                        TestMD5Hash.getTestHash(), ProtocolStatus.STATUS_SUCCESS);
                         
     TestWritable.testWritable(o);
 

Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/parse/TestParseData.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/parse/TestParseData.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/parse/TestParseData.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/parse/TestParseData.java Fri Jun  3 12:11:51 2005
@@ -40,7 +40,7 @@
     metaData.put("Language", "en/us");
     metaData.put("Charset", "UTF-8");
 
-    ParseData r = new ParseData(title, outlinks, metaData);
+    ParseData r = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metaData);
                         
     TestWritable.testWritable(r);
   }

Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/tools/TestSegmentMergeTool.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/tools/TestSegmentMergeTool.java?rev=179858&r1=179857&r2=179858&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/tools/TestSegmentMergeTool.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/tools/TestSegmentMergeTool.java Fri Jun  3 12:11:51 2005
@@ -22,17 +22,17 @@
 
 import org.apache.nutch.db.Page;
 import org.apache.nutch.fetcher.FetcherOutput;
-import org.apache.nutch.io.ArrayFile;
 import org.apache.nutch.io.MD5Hash;
 import org.apache.nutch.fs.*;
 import org.apache.nutch.segment.SegmentReader;
 import org.apache.nutch.segment.SegmentWriter;
-import org.apache.nutch.util.*;
 import org.apache.nutch.pagedb.FetchListEntry;
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.parse.ParseData;
+import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.parse.ParseText;
 import org.apache.nutch.protocol.Content;
+import org.apache.nutch.protocol.ProtocolStatus;
 
 import junit.framework.TestCase;
 
@@ -81,7 +81,7 @@
       }
       url += "/example.html";
       FetchListEntry fle = new FetchListEntry(true, new Page(url, 1.0f), new String[] { "test" + rnd });
-      FetcherOutput fo = new FetcherOutput(fle, MD5Hash.digest(url), FetcherOutput.SUCCESS);
+      FetcherOutput fo = new FetcherOutput(fle, MD5Hash.digest(url), ProtocolStatus.STATUS_SUCCESS);
       StringBuffer content = new StringBuffer("<html><body><h1>Hello from Page " + i + "</h1>");
       if (unique) {
         content.append("<p>Created at epoch time: " + System.currentTimeMillis() + ", " + r.nextLong() + "</p>");
@@ -95,7 +95,7 @@
       meta.setProperty("Host", "http://localhost");
       meta.setProperty("Connection", "Keep-alive, close");
       Content co = new Content(url, "http://www.example.com", content.toString().getBytes("UTF-8"), "text/html", meta);
-      ParseData pd = new ParseData("Hello from Page " + i, new Outlink[0], meta);
+      ParseData pd = new ParseData(ParseStatus.STATUS_SUCCESS, "Hello from Page " + i, new Outlink[0], meta);
       StringBuffer text = new StringBuffer("Hello from Page" + i);
       if (unique) {
         text.append("\nCreated at epoch time: " + System.currentTimeMillis() + ", " + r.nextLong());