You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/09/08 21:45:01 UTC

svn commit: r279605 [4/4] - in /lucene/nutch/branches/mapred: ./ bin/ conf/ docs/ca/ docs/de/ docs/en/ docs/es/ docs/fi/ docs/fr/ docs/hu/ docs/jp/ docs/ms/ docs/nl/ docs/pl/ docs/pt/ docs/sv/ docs/th/ docs/zh/ lib/ site/ src/java/org/apache/nutch/anal...

Modified: lucene/nutch/branches/mapred/src/plugin/query-site/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/query-site/plugin.xml?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/query-site/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/query-site/plugin.xml Thu Sep  8 12:42:44 2005
@@ -5,10 +5,6 @@
    version="1.0.0"
    provider-name="nutch.org">
 
-   <extension-point
-      id="org.apache.nutch.searcher.QueryFilter"
-      name="Nutch Query Filter"/>
-
    <runtime>
       <library name="query-site.jar">
          <export name="*"/>

Modified: lucene/nutch/branches/mapred/src/plugin/query-url/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/query-url/plugin.xml?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/query-url/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/query-url/plugin.xml Thu Sep  8 12:42:44 2005
@@ -5,9 +5,7 @@
    version="1.0.0"
    provider-name="nutch.org">
 
-   <extension-point
-      id="org.apache.nutch.searcher.QueryFilter"
-      name="Nutch Query Filter"/>
+
 
    <runtime>
       <library name="query-url.jar">

Modified: lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/plugin.xml?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/plugin.xml Thu Sep  8 12:42:44 2005
@@ -5,9 +5,7 @@
    version="1.0.0"
    provider-name="nutch.org">
 
-   <extension-point
-      id="org.apache.nutch.net.URLFilter"
-      name="Nutch URL Filter"/>
+
 
    <runtime>
       <library name="urlfilter-prefix.jar">

Modified: lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java Thu Sep  8 12:42:44 2005
@@ -35,7 +35,6 @@
 
 import java.util.List;
 import java.util.ArrayList;
-import java.util.Iterator;
 import java.util.logging.Logger;
 
 /**
@@ -56,10 +55,10 @@
   static {
     String pluginName = "urlfilter-prefix";
     Extension[] extensions = PluginRepository.getInstance()
-      .getExtensionPoint(URLFilter.class.getName()).getExtentens();
+      .getExtensionPoint(URLFilter.class.getName()).getExtensions();
     for (int i=0; i < extensions.length; i++) {
       Extension extension = extensions[i];
-      if (extension.getDiscriptor().getPluginId().equals(pluginName)) {
+      if (extension.getDescriptor().getPluginId().equals(pluginName)) {
         attributeFile = extension.getAttribute("file");
         break;
       }

Modified: lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/plugin.xml?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/plugin.xml Thu Sep  8 12:42:44 2005
@@ -5,10 +5,6 @@
    version="1.0.0"
    provider-name="nutch.org">
 
-   <extension-point
-      id="org.apache.nutch.net.URLFilter"
-      name="Nutch URL Filter"/>
-
    <runtime>
       <library name="urlfilter-regex.jar">
          <export name="*"/>

Modified: lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/src/java/org/apache/nutch/net/RegexURLFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/src/java/org/apache/nutch/net/RegexURLFilter.java?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/src/java/org/apache/nutch/net/RegexURLFilter.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/src/java/org/apache/nutch/net/RegexURLFilter.java Thu Sep  8 12:42:44 2005
@@ -36,7 +36,6 @@
 import org.apache.oro.text.regex.Perl5Compiler;
 import org.apache.oro.text.regex.Perl5Matcher;
 import org.apache.oro.text.regex.Perl5Pattern;
-import org.apache.oro.text.regex.Pattern;
 import org.apache.oro.text.regex.PatternMatcher;
 import org.apache.oro.text.regex.MalformedPatternException;
 
@@ -63,10 +62,10 @@
   static {
     String pluginName = "urlfilter-regex";
     Extension[] extensions = PluginRepository.getInstance()
-      .getExtensionPoint(URLFilter.class.getName()).getExtentens();
+      .getExtensionPoint(URLFilter.class.getName()).getExtensions();
     for (int i=0; i < extensions.length; i++) {
       Extension extension = extensions[i];
-      if (extension.getDiscriptor().getPluginId().equals(pluginName)) {
+      if (extension.getDescriptor().getPluginId().equals(pluginName)) {
         attributeFile = extension.getAttribute("file");
         break;
       }

Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/bot.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/bot.xml?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/bot.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/bot.xml Thu Sep  8 12:42:44 2005
@@ -21,7 +21,7 @@
 us.  Our software may be run by anyone.  However, we'd still like to
 hear about any bad behavior.  If possible, please include the name of
 the domain and some representative log entries.  We can be reached at
-<a href="mailto:agent@nutch.org"> agent@nutch.org</a>.</p>
+<a href="mailto:nutch-agent@lucene.apache.org">nutch-agent@lucene.apache.org</a>.</p>
 
 <p> Our software obeys the robots.txt exclusion standard, described at
 <a href="http://www.robotstxt.org/wc/exclusion.html#robotstxt">
@@ -52,7 +52,7 @@
 <section><title>Contact us</title>
 
 <p>If your site has problems or questions about the Nutch crawler, please
-send an email to the <a href="mailto:agent@nutch.org">Nutch agent
+send an email to the <a href="mailto:nutch-agent@lucene.apache.org">Nutch agent
 mailing list</a>.</p>
 
 </section>

Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/credits.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/credits.xml?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/credits.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/credits.xml Thu Sep  8 12:42:44 2005
@@ -16,7 +16,9 @@
 <ul>
   <li>Andrzej Bialecki</li>
   <li>Mike Cafarella</li>
+  <li><a href="http://www.frutch.org/">Jérôme Charron</a></li>
   <li><a href="http://www.nutch.org/blog/cutting.html">Doug Cutting</a></li>
+  <li>Piotr Kosiorowski</li>
   <li>Sami Siren</li>
   <li>John Xing</li>
 </ul>

Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/i18n.xml Thu Sep  8 12:42:44 2005
@@ -41,7 +41,7 @@
 </ol>
 
 <p>If you'd like to provide a translation, simply post translations of
-these five files to <a href="mailto:dev@nutch.org">dev@nutch.org</a>
+these five files to <a href="mailto:nutch-dev@lucene.apache.org">nutch-dev@lucene.apache.org</a>
 as an attachment.</p>
 
 </section>
@@ -163,7 +163,7 @@
  href="http://java.sun.com/j2se/downloads.html">Java</a>, <a
  href="http://ant.apache.org/">Ant</a> and Nutch installed.  To
  install Nutch, either download and unpack the latest <a
- href="http://www.nutch.org/release/nightly/">release</a>, or check it
+ href="http://lucene.apache.org/nutch/release/nightly/">release</a>, or check it
  out from <a
  href="version_control.html">Subversion</a>.</p>
 
@@ -188,7 +188,7 @@
 href="http://jakarta.apache.org/tomcat/">Tomcat</a> installed.</p>
 
 <p>An index is also required.  You can collect your own by working
-through the <a href="http://www.nutch.org/tutorial.html">tutorial</a>.
+through the <a href="http://lucene.apache.org/nutch/tutorial.html">tutorial</a>.
 Once you have an index, follow the steps outlined at the end of the
 tutorial for searching.</p>
 

Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/index.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/index.xml?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/index.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/index.xml Thu Sep  8 12:42:44 2005
@@ -15,6 +15,15 @@
       <title>News</title>
 
       <section>
+      <title>17 August 2005: Nutch 0.7 Released</title>
+      <p>This is the first Nutch release as an Apache Lucene sub-project. See 
+      <a href="http://svn.apache.org/viewcvs.cgi/lucene/nutch/trunk/CHANGES.txt?rev=233150">
+      CHANGES.txt</a> for details. The release is available 
+      <a href="http://lucene.apache.org/nutch/release/">here</a>.</p>
+      </section>
+
+
+      <section>
       <title>June 2005: Nutch graduates from Incubator</title>
 
       <p>Nutch has now graduated from the Apache incubator, and is now

Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/tutorial.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/tutorial.xml?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/tutorial.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/tutorial.xml Thu Sep  8 12:42:44 2005
@@ -34,7 +34,7 @@
 
 <p>First, you need to get a copy of the Nutch code.  You can download
 a release from <a
-href="http://www.nutch.org/release/">http://www.nutch.org/release/</a>.
+href="http://lucene.apache.org/nutch/release/">http://lucene.apache.org/nutch/release/</a>.
 Unpack the release and connect to its top-level directory.  Or, check
 out the latest source code from <a
 href="version_control.html">subversion</a> and build it
@@ -67,23 +67,23 @@
 <ol>
 
 <li>Create a flat file of root urls.  For example, to crawl the
-<code>nutch.org</code> site you might start with a file named
+<code>nutch</code> site you might start with a file named
 <code>urls</code> containing just the Nutch home page.  All other
 Nutch pages should be reachable from this page.  The <code>urls</code>
 file would thus look like:
 <source>
-http://www.nutch.org/
+http://lucene.apache.org/nutch/
 </source>
 </li>
 
 <li>Edit the file <code>conf/crawl-urlfilter.txt</code> and replace
 <code>MY.DOMAIN.NAME</code> with the name of the domain you wish to
 crawl.  For example, if you wished to limit the crawl to the
-<code>nutch.org</code> domain, the line should read:
+<code>apache.org</code> domain, the line should read:
 <source>
-+^http://([a-z0-9]*\.)*nutch.org/
++^http://([a-z0-9]*\.)*apache.org/
 </source>
-This will include any url in the domain <code>nutch.org</code>.
+This will include any url in the domain <code>apache.org</code>.
 </li>
 
 </ol>

Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java Thu Sep  8 12:42:44 2005
@@ -88,17 +88,21 @@
         BitSet bits = new BitSet(length);
         for (int j = 0; j < splits.length; j++) {
           RecordReader reader = format.getRecordReader(fs, splits[j], job);
-          int count = 0;
-          while (reader.next(key, value)) {
-//             if (bits.get(key.get())) {
-//               LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
-//               LOG.info("@"+reader.getPos());
-//             }
-            assertFalse("Key in multiple partitions.", bits.get(key.get()));
-            bits.set(key.get());
-            count++;
+          try {
+            int count = 0;
+            while (reader.next(key, value)) {
+              // if (bits.get(key.get())) {
+              // LOG.info("splits["+j+"]="+splits[j]+" : " + key.get());
+              // LOG.info("@"+reader.getPos());
+              // }
+              assertFalse("Key in multiple partitions.", bits.get(key.get()));
+              bits.set(key.get());
+              count++;
+            }
+            //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
+          } finally {
+            reader.close();
           }
-          //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
         }
         assertEquals("Some keys in no partition.", length, bits.cardinality());
       }

Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/mapred/TestTextInputFormat.java Thu Sep  8 12:42:44 2005
@@ -82,18 +82,22 @@
         BitSet bits = new BitSet(length);
         for (int j = 0; j < splits.length; j++) {
           RecordReader reader = format.getRecordReader(fs, splits[j], job);
-          int count = 0;
-          while (reader.next(key, value)) {
-            int v = Integer.parseInt(value.toString());
-//             if (bits.get(v)) {
-//               LOG.info("splits["+j+"]="+splits[j]+" : " + v);
-//               LOG.info("@"+reader.getPos());
-//             }
-            assertFalse("Key in multiple partitions.", bits.get(v));
-            bits.set(v);
-            count++;
+          try {
+            int count = 0;
+            while (reader.next(key, value)) {
+              int v = Integer.parseInt(value.toString());
+              //             if (bits.get(v)) {
+              //               LOG.info("splits["+j+"]="+splits[j]+" : " + v);
+              //               LOG.info("@"+reader.getPos());
+              //             }
+              assertFalse("Key in multiple partitions.", bits.get(v));
+              bits.set(v);
+              count++;
+            }
+            //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
+          } finally {
+            reader.close();
           }
-          //LOG.info("splits["+j+"]="+splits[j]+" count=" + count);
         }
         assertEquals("Some keys in no partition.", length, bits.cardinality());
       }

Modified: lucene/nutch/branches/mapred/src/test/org/apache/nutch/plugin/TestPluginSystem.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/test/org/apache/nutch/plugin/TestPluginSystem.java?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/test/org/apache/nutch/plugin/TestPluginSystem.java (original)
+++ lucene/nutch/branches/mapred/src/test/org/apache/nutch/plugin/TestPluginSystem.java Thu Sep  8 12:42:44 2005
@@ -95,7 +95,7 @@
                 .getExtensionPoint(xpId);
         assertEquals(extensionPoint, null);
         Extension[] extension1 = PluginRepository.getInstance()
-                .getExtensionPoint(getGetExtensionId()).getExtentens();
+                .getExtensionPoint(getGetExtensionId()).getExtensions();
         assertEquals(extension1.length, fPluginCount);
         for (int i = 0; i < extension1.length; i++) {
             Extension extension2 = extension1[i];
@@ -109,7 +109,7 @@
      */
     public void testGetExtensionInstances() throws PluginRuntimeException {
         Extension[] extensions = PluginRepository.getInstance()
-                .getExtensionPoint(getGetExtensionId()).getExtentens();
+                .getExtensionPoint(getGetExtensionId()).getExtensions();
         assertEquals(extensions.length, fPluginCount);
         for (int i = 0; i < extensions.length; i++) {
             Extension extension = extensions[i];

Modified: lucene/nutch/branches/mapred/src/web/jsp/search.jsp
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/web/jsp/search.jsp?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/web/jsp/search.jsp (original)
+++ lucene/nutch/branches/mapred/src/web/jsp/search.jsp Thu Sep  8 12:42:44 2005
@@ -123,9 +123,14 @@
 <link rel="alternate" type="application/rss+xml" title="RSS" href="<%=rss%>"/>
 <jsp:include page="include/style.html"/>
 <base href="<%= base  + "/" + language %>/">
+<script type="text/javascript">
+<!--
+function queryfocus() { document.search.query.focus(); }
+// -->
+</script>
 </head>
 
-<body>
+<body onLoad="queryfocus();">
 
 <jsp:include page="<%= language + "/include/header.html"%>"/>
 

Modified: lucene/nutch/branches/mapred/src/web/locale/org/nutch/jsp/search_de.properties
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/web/locale/org/nutch/jsp/search_de.properties?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/web/locale/org/nutch/jsp/search_de.properties (original)
+++ lucene/nutch/branches/mapred/src/web/locale/org/nutch/jsp/search_de.properties Thu Sep  8 12:42:44 2005
@@ -1,9 +1,11 @@
-title = Suchresultate
+title = Suchergebnisse
 search = Suche
 hits = Treffer <b>{0}-{1}</b> (von insgesammt {2} gefundenen Seiten):
-cached = zwischengespeichert
-explain = erkl&auml;ung
-anchors = referenzen
+cached = Im Cache
+explain = Erkl&auml;rung
+anchors = Referenzen
 next = Weiter
-clustering = clustering
-viewAsText = View as Plain Text
+clustering = Clustern
+viewAsText = HTML-Version
+moreFrom = Mehr von
+showAllHits = Alle Treffer anzeigen

Modified: lucene/nutch/branches/mapred/src/web/style/nutch-page.xsl
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/web/style/nutch-page.xsl?rev=279605&r1=279604&r2=279605&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/web/style/nutch-page.xsl (original)
+++ lucene/nutch/branches/mapred/src/web/style/nutch-page.xsl Thu Sep  8 12:42:44 2005
@@ -17,8 +17,15 @@
 <!-- specify icon file -->
       <link rel="icon" href="../img/favicon.ico" type="image/x-icon"/>
       <link rel="shortcut icon" href="../img/favicon.ico" type="image/x-icon"/>
+
+      <script type="text/javascript">
+      <xsl:comment>
+function queryfocus() { document.search.query.focus(); }
+<xsl:text>// </xsl:text>
+</xsl:comment>
+      </script>
       </head>
-      <body>
+      <body onLoad="queryfocus();">
 <!-- insert localized header -->
         <xsl:copy-of select="document('include/header.html')"/>
         <table width="635" border="0" cellpadding="0" cellspacing="0">