You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2005/10/18 23:00:19 UTC

svn commit: r326238 [2/2] - in /lucene/nutch/branches/mapred: ./ conf/ site/ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/db/ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/parse/ src/java/org/apache/nutch/plugin/ src/java/o...

Modified: lucene/nutch/branches/mapred/src/plugin/index-basic/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/index-basic/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/index-basic/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/index-basic/plugin.xml Tue Oct 18 13:59:40 2005
@@ -12,6 +12,10 @@
       </library>
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <extension id="org.apache.nutch.indexer.basic"
               name="Nutch Basic Indexing Filter"
               point="org.apache.nutch.indexer.IndexingFilter">

Modified: lucene/nutch/branches/mapred/src/plugin/index-more/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/index-more/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/index-more/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/index-more/plugin.xml Tue Oct 18 13:59:40 2005
@@ -12,6 +12,10 @@
       </library>
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <extension id="org.apache.nutch.indexer.more"
               name="Nutch More Indexing Filter"
               point="org.apache.nutch.indexer.IndexingFilter">

Modified: lucene/nutch/branches/mapred/src/plugin/languageidentifier/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/languageidentifier/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/languageidentifier/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/languageidentifier/plugin.xml Tue Oct 18 13:59:40 2005
@@ -5,13 +5,15 @@
    version="1.0.0"
    provider-name="nutch.org">
 
-
-
     <runtime>
       <library name="language-identifier.jar">
          <export name="*"/>
       </library>
    </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
 
    <extension id="org.apache.nutch.analysis.lang.LanguageParser"
               name="Nutch language Parser"

Modified: lucene/nutch/branches/mapred/src/plugin/ontology/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/ontology/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/ontology/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/ontology/plugin.xml Tue Oct 18 13:59:40 2005
@@ -18,6 +18,10 @@
 
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <!-- attribute "point" is the plugin interface class -->
    <!-- seems kinda redundant to have to define the point here too -->   
    <extension id="org.apache.nutch.ontology.OntologyImpl"

Modified: lucene/nutch/branches/mapred/src/plugin/parse-ext/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-ext/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-ext/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-ext/plugin.xml Tue Oct 18 13:59:40 2005
@@ -5,13 +5,15 @@
    version="1.0.0"
    provider-name="nutch.org">
 
-
-
    <runtime>
       <library name="parse-ext.jar">
          <export name="*"/>
       </library>
    </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
 
    <extension id="org.apache.nutch.parse.ext"
               name="ExtParse"

Modified: lucene/nutch/branches/mapred/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java Tue Oct 18 13:59:40 2005
@@ -21,9 +21,8 @@
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.ProtocolException;
 
-import org.apache.nutch.parse.ParserFactory;
-import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseException;
 
 import junit.framework.TestCase;
@@ -46,8 +45,7 @@
 public class TestExtParser extends TestCase {
   private File tempFile = null;
   private String urlString = null;
-  private Content content = null;;
-  private Parser parser = null;;
+  private Content content = null;
   private Parse parse = null;
 
   private String expectedText = "nutch rocks nutch rocks nutch rocks";
@@ -107,15 +105,13 @@
       // check external parser that does 'cat'
       contentType = "application/vnd.nutch.example.cat";
       content.setContentType(contentType);
-      parser = ParserFactory.getParser(contentType, urlString);
-      parse = parser.getParse(content);
+      parse = ParseUtil.parseByParserId("parse-ext", content);
       assertEquals(expectedText,parse.getText());
 
       // check external parser that does 'md5sum'
       contentType = "application/vnd.nutch.example.md5sum";
       content.setContentType(contentType);
-      parser = ParserFactory.getParser(contentType, urlString);
-      parse = parser.getParse(content);
+      parse = ParseUtil.parseByParserId("parse-ext", content);
       assertTrue(parse.getText().startsWith(expectedMD5sum));
     }
   }

Modified: lucene/nutch/branches/mapred/src/plugin/parse-html/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-html/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-html/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-html/plugin.xml Tue Oct 18 13:59:40 2005
@@ -5,8 +5,6 @@
    version="1.0.0"
    provider-name="nutch.org">
 
-
-
    <runtime>
       <library name="parse-html.jar">
          <export name="*"/>
@@ -14,6 +12,10 @@
       <library name="nekohtml-0.9.4.jar"/>
       <library name="tagsoup-1.0rc3.jar"/>
    </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
 
    <extension id="org.apache.nutch.parse.html"
               name="HtmlParse"

Modified: lucene/nutch/branches/mapred/src/plugin/parse-js/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-js/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-js/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-js/plugin.xml Tue Oct 18 13:59:40 2005
@@ -11,6 +11,10 @@
       </library>
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <extension id="org.apache.nutch.parse.js"
               name="JS Parser"
               point="org.apache.nutch.parse.Parser">

Modified: lucene/nutch/branches/mapred/src/plugin/parse-mp3/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-mp3/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-mp3/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-mp3/plugin.xml Tue Oct 18 13:59:40 2005
@@ -12,6 +12,10 @@
       <library name="jid3lib-0.5.1.jar"/>
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <extension point="org.apache.nutch.parse.Parser"
               id="org.apache.nutch.parse.mp3"
               name="MP3Parse">

Modified: lucene/nutch/branches/mapred/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java Tue Oct 18 13:59:40 2005
@@ -19,7 +19,7 @@
 import junit.framework.TestCase;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseException;
-import org.apache.nutch.parse.Parser;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParserFactory;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.Protocol;
@@ -60,15 +60,13 @@
     String urlString;
     Protocol protocol;
     Content content;
-    Parser parser;
     Parse parse;
 
     urlString = "file:" + sampleDir + fileSeparator + id3v2;
     protocol = ProtocolFactory.getProtocol(urlString);
     content = protocol.getContent(urlString);
 
-    parser = ParserFactory.getParser(content.getContentType(), urlString);
-    parse = parser.getParse(content);
+    parse = ParseUtil.parseByParserId("parse-mp3",content);
     Properties metadata = parse.getData().getMetadata();
     assertEquals("postgresql comment id3v2", metadata.getProperty("COMM-Text"));
     assertEquals("postgresql composer id3v2", metadata.getProperty("TCOM-Text"));

Modified: lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/plugin.xml Tue Oct 18 13:59:40 2005
@@ -13,6 +13,7 @@
 
    <requires>
       <import plugin="lib-jakarta-poi"/>
+      <import plugin="nutch-extensionpoints"/>
    </requires>
 
    <extension id="net.nutch.parse.mspowerpoint"

Modified: lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java Tue Oct 18 13:59:40 2005
@@ -29,8 +29,7 @@
 
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseData;
-import org.apache.nutch.parse.Parser;
-import org.apache.nutch.parse.ParserFactory;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.ProtocolFactory;
@@ -123,9 +122,7 @@
    */
   public void testContent() throws Exception {
 
-    Parser parser = ParserFactory.getParser(this.content.getContentType(),
-        this.urlString);
-    Parse parse = parser.getParse(this.content);
+    Parse parse = ParseUtil.parseByParserId("parse-mspowerpoint",this.content);
 
     ParseData data = parse.getData();
     String text = parse.getText();
@@ -162,10 +159,8 @@
    */
   public void testMeta() throws Exception {
 
-    Parser parser = ParserFactory.getParser(this.content.getContentType(),
-        this.urlString);
-    Parse parse = parser.getParse(this.content);
-
+    Parse parse = ParseUtil.parseByParserId("parse-mspowerpoint",content);
+    
     ParseData data = parse.getData();
 
     final FileExtensionFilter titleFilter = new FileExtensionFilter(

Modified: lucene/nutch/branches/mapred/src/plugin/parse-msword/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-msword/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-msword/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-msword/plugin.xml Tue Oct 18 13:59:40 2005
@@ -13,6 +13,10 @@
       <library name="poi-scratchpad-2.1-20040508.jar"/>
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <extension id="org.apache.nutch.parse.msword"
               name="MSWordParse"
               point="org.apache.nutch.parse.Parser">

Modified: lucene/nutch/branches/mapred/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java Tue Oct 18 13:59:40 2005
@@ -21,9 +21,8 @@
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.ProtocolException;
 
-import org.apache.nutch.parse.ParserFactory;
-import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseException;
 
 import junit.framework.TestCase;
@@ -57,7 +56,6 @@
     String urlString;
     Protocol protocol;
     Content content;
-    Parser parser;
     Parse parse;
 
     for (int i=0; i<sampleFiles.length; i++) {
@@ -65,9 +63,7 @@
 
       protocol = ProtocolFactory.getProtocol(urlString);
       content = protocol.getProtocolOutput(urlString).getContent();
-
-      parser = ParserFactory.getParser(content.getContentType(), urlString);
-      parse = parser.getParse(content);
+      parse = ParseUtil.parseByParserId("parse-msword",content);
 
       assertTrue(parse.getText().startsWith(expectedText));
     }

Modified: lucene/nutch/branches/mapred/src/plugin/parse-pdf/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-pdf/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-pdf/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-pdf/plugin.xml Tue Oct 18 13:59:40 2005
@@ -10,9 +10,13 @@
       <library name="parse-pdf.jar">
          <export name="*"/>
       </library>
-      <library name="PDFBox-0.7.0.jar"/>
+      <library name="PDFBox-0.7.2-log4j.jar"/>
       <library name="log4j-1.2.9.jar"/>
    </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
 
    <extension id="org.apache.nutch.parse.pdf"
               name="PdfParse"

Modified: lucene/nutch/branches/mapred/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java Tue Oct 18 13:59:40 2005
@@ -21,9 +21,8 @@
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.ProtocolException;
 
-import org.apache.nutch.parse.ParserFactory;
-import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseException;
 
 import junit.framework.TestCase;
@@ -57,7 +56,6 @@
     String urlString;
     Protocol protocol;
     Content content;
-    Parser parser;
     Parse parse;
 
     for (int i=0; i<sampleFiles.length; i++) {
@@ -65,9 +63,7 @@
 
       protocol = ProtocolFactory.getProtocol(urlString);
       content = protocol.getProtocolOutput(urlString).getContent();
-
-      parser = ParserFactory.getParser(content.getContentType(), urlString);
-      parse = parser.getParse(content);
+      parse = ParseUtil.parseByParserId("parse-pdf",content);
 
       int index = parse.getText().indexOf(expectedText);
       assertTrue(index > 0);

Modified: lucene/nutch/branches/mapred/src/plugin/parse-rss/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-rss/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-rss/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-rss/plugin.xml Tue Oct 18 13:59:40 2005
@@ -20,8 +20,11 @@
       <library name="xercesImpl.jar"/>
       <library name="xml-apis.jar"/>
       <library name="xml-rpc-1.2.jar"/>
-
    </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
 
    <extension id="org.apache.nutch.parse.rss"
               name="RssParse"

Modified: lucene/nutch/branches/mapred/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java Tue Oct 18 13:59:40 2005
@@ -157,11 +157,13 @@
                 if (r.getLink() != null) {
                     try {
                         // get the outlink
-                        theOutlinks.add(new Outlink(r.getLink(), r
-                                .getDescription()));
+			if (r.getDescription()!= null ) {
+			    theOutlinks.add(new Outlink(r.getLink(), r.getDescription()));
+			} else {
+			    theOutlinks.add(new Outlink(r.getLink(), ""));
+			}
                     } catch (MalformedURLException e) {
-                        LOG
-                                .info("nutch:parse-rss:RSSParser Exception: MalformedURL: "
+                        LOG.info("nutch:parse-rss:RSSParser Exception: MalformedURL: "
                                         + r.getLink()
                                         + ": Attempting to continue processing outlinks");
                         e.printStackTrace();
@@ -185,12 +187,13 @@
 
                     if (whichLink != null) {
                         try {
-                            theOutlinks.add(new Outlink(whichLink, theRSSItem
-                                    .getDescription()));
-
+			    if (theRSSItem.getDescription()!=null) {
+				theOutlinks.add(new Outlink(whichLink, theRSSItem.getDescription()));
+			    } else {
+				theOutlinks.add(new Outlink(whichLink, ""));
+			    }
                         } catch (MalformedURLException e) {
-                            LOG
-                                    .info("nutch:parse-rss:RSSParser Exception: MalformedURL: "
+                            LOG.info("nutch:parse-rss:RSSParser Exception: MalformedURL: "
                                             + whichLink
                                             + ": Attempting to continue processing outlinks");
                             e.printStackTrace();
@@ -206,23 +209,18 @@
             LOG.fine("nutch:parse-rss:getParse:contentTitle=" + contentTitle);
 
         } else {
-            LOG
-                    .fine("nutch:parse-rss:Error:getParse: No RSS Channels recorded!");
+            LOG.fine("nutch:parse-rss:Error:getParse: No RSS Channels recorded!");
         }
 
         // format the outlinks
+        Outlink[] outlinks = (Outlink[]) theOutlinks.toArray(new Outlink[theOutlinks.size()]);
 
-        Outlink[] outlinks = (Outlink[]) theOutlinks
-                .toArray(new Outlink[theOutlinks.size()]);
-
-        LOG.fine("nutch:parse-rss:getParse:found " + outlinks.length
-                + " outlinks");
+        LOG.fine("nutch:parse-rss:getParse:found " + outlinks.length + " outlinks");
         // LOG.info("Outlinks: "+outlinks);
 
         ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
                 contentTitle.toString(), outlinks, content.getMetadata());
         return new ParseImpl(indexText.toString(), parseData);
-
     }
 
 }

Modified: lucene/nutch/branches/mapred/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java Tue Oct 18 13:59:40 2005
@@ -21,9 +21,8 @@
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.ProtocolException;
 
-import org.apache.nutch.parse.ParserFactory;
-import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.parse.Outlink;
@@ -75,7 +74,6 @@
         String urlString;
         Protocol protocol;
         Content content;
-        Parser parser;
         Parse parse;
 
         for (int i = 0; i < sampleFiles.length; i++) {
@@ -83,10 +81,7 @@
 
             protocol = ProtocolFactory.getProtocol(urlString);
             content = protocol.getProtocolOutput(urlString).getContent();
-
-            parser = ParserFactory.getParser(content.getContentType(),
-                    urlString);
-            parse = parser.getParse(content);
+            parse = ParseUtil.parseByParserId("parse-rss",content);
 
             //check that there are 3 outlinks:
             //http://test.channel.com

Modified: lucene/nutch/branches/mapred/src/plugin/parse-rtf/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-rtf/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-rtf/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-rtf/plugin.xml Tue Oct 18 13:59:40 2005
@@ -12,6 +12,10 @@
       <library name="rtf-parser.jar"/>
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <extension point="org.apache.nutch.parse.Parser"
               id="org.apache.nutch.parse.rtf"
               name="RTFParse">

Modified: lucene/nutch/branches/mapred/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java Tue Oct 18 13:59:40 2005
@@ -18,8 +18,8 @@
 
 import junit.framework.TestCase;
 import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseException;
-import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.ParserFactory;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.Protocol;
@@ -58,15 +58,13 @@
     String urlString;
     Protocol protocol;
     Content content;
-    Parser parser;
     Parse parse;
 
     urlString = "file:" + sampleDir + fileSeparator + rtfFile;
     protocol = ProtocolFactory.getProtocol(urlString);
     content = protocol.getContent(urlString);
 
-    parser = ParserFactory.getParser(content.getContentType(), urlString);
-    parse = parser.getParse(content);
+    parse = ParseUtil.parseByParserId("parse-rtf",content);
     String text = parse.getText();
     assertEquals("The quick brown fox jumps over the lazy dog", text.trim());
 

Modified: lucene/nutch/branches/mapred/src/plugin/parse-text/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-text/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-text/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-text/plugin.xml Tue Oct 18 13:59:40 2005
@@ -12,6 +12,10 @@
       </library>
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <extension id="org.apache.nutch.parse.text"
               name="TextParse"
               point="org.apache.nutch.parse.Parser">

Modified: lucene/nutch/branches/mapred/src/plugin/parse-zip/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-zip/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-zip/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-zip/plugin.xml Tue Oct 18 13:59:40 2005
@@ -11,6 +11,10 @@
       </library>
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <extension id="org.apache.nutch.parse.zip"
               name="ZipParser" 
               point="org.apache.nutch.parse.Parser">

Modified: lucene/nutch/branches/mapred/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java Tue Oct 18 13:59:40 2005
@@ -28,9 +28,8 @@
 
 // Nutch imports
 import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.Parser;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseData;
-import org.apache.nutch.parse.ParserFactory;
 import org.apache.nutch.parse.ParseException;
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.protocol.Content;
@@ -89,8 +88,7 @@
             metadata.setProperty("Content-Length", Long.toString(entry.getSize()));
             metadata.setProperty("Content-Type", contentType);
             Content content = new Content(newurl, base, b, contentType, metadata);
-            Parser parser = ParserFactory.getParser(contentType, newurl);
-            Parse parse = parser.getParse(content);
+            Parse parse = ParseUtil.parse(content);
             ParseData theParseData = parse.getData();
             Outlink[] theOutlinks = theParseData.getOutlinks();
             

Modified: lucene/nutch/branches/mapred/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java (original)
+++ lucene/nutch/branches/mapred/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java Tue Oct 18 13:59:40 2005
@@ -21,9 +21,8 @@
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.ProtocolException;
 
-import org.apache.nutch.parse.ParserFactory;
-import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseException;
 
 import junit.framework.TestCase;
@@ -57,7 +56,6 @@
     String urlString;
     Protocol protocol;
     Content content;
-    Parser parser;
     Parse parse;
 
     for (int i = 0; i < sampleFiles.length; i++) {
@@ -65,9 +63,7 @@
 
       protocol = ProtocolFactory.getProtocol(urlString);
       content = protocol.getProtocolOutput(urlString).getContent();
-
-      parser = ParserFactory.getParser(content.getContentType(), urlString);
-      parse = parser.getParse(content);
+      parse = ParseUtil.parseByParserId("parse-zip",content);
       assertTrue(parse.getText().equals(expectedText));
     }
   }

Modified: lucene/nutch/branches/mapred/src/plugin/protocol-file/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-file/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/protocol-file/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/protocol-file/plugin.xml Tue Oct 18 13:59:40 2005
@@ -12,6 +12,10 @@
       </library>
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <extension id="org.apache.nutch.protocol.file"
               name="FileProtocol"
               point="org.apache.nutch.protocol.Protocol">

Modified: lucene/nutch/branches/mapred/src/plugin/protocol-ftp/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-ftp/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/protocol-ftp/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/protocol-ftp/plugin.xml Tue Oct 18 13:59:40 2005
@@ -5,14 +5,16 @@
    version="1.0.0"
    provider-name="nutch.org">
 
-
-
    <runtime>
       <library name="protocol-ftp.jar">
          <export name="*"/>
       </library>
       <library name="commons-net-1.2.0-dev.jar"/>
    </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
 
    <extension id="org.apache.nutch.protocol.ftp"
               name="FtpProtocol"

Modified: lucene/nutch/branches/mapred/src/plugin/protocol-http/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-http/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/protocol-http/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/protocol-http/plugin.xml Tue Oct 18 13:59:40 2005
@@ -11,6 +11,10 @@
       </library>
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <extension id="org.apache.nutch.protocol.http"
               name="HttpProtocol"
               point="org.apache.nutch.protocol.Protocol">

Modified: lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/plugin.xml Tue Oct 18 13:59:40 2005
@@ -13,6 +13,10 @@
       <library name="commons-httpclient-3.0-rc2.jar" />
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <extension id="org.apache.nutch.protocol.httpclient"
               name="HttpProtocol"
               point="org.apache.nutch.protocol.Protocol">

Modified: lucene/nutch/branches/mapred/src/plugin/query-basic/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/query-basic/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/query-basic/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/query-basic/plugin.xml Tue Oct 18 13:59:40 2005
@@ -5,13 +5,15 @@
    version="1.0.0"
    provider-name="nutch.org">
 
-
-
    <runtime>
       <library name="query-basic.jar">
          <export name="*"/>
       </library>
    </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
 
    <extension id="org.apache.nutch.searcher.basic"
               name="Nutch Basic Query Filter"

Modified: lucene/nutch/branches/mapred/src/plugin/query-more/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/query-more/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/query-more/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/query-more/plugin.xml Tue Oct 18 13:59:40 2005
@@ -5,13 +5,15 @@
    version="1.0.0"
    provider-name="nutch.org">
 
-
-
    <runtime>
       <library name="query-more.jar">
          <export name="*"/>
       </library>
    </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
 
    <extension id="org.apache.nutch.searcher.more"
               name="Nutch More Query Filter"

Modified: lucene/nutch/branches/mapred/src/plugin/query-site/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/query-site/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/query-site/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/query-site/plugin.xml Tue Oct 18 13:59:40 2005
@@ -11,6 +11,10 @@
       </library>
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
    <extension id="org.apache.nutch.searcher.site.SiteQueryFilter"
               name="Nutch Site Query Filter"
               point="org.apache.nutch.searcher.QueryFilter">

Modified: lucene/nutch/branches/mapred/src/plugin/query-url/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/query-url/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/query-url/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/query-url/plugin.xml Tue Oct 18 13:59:40 2005
@@ -5,14 +5,15 @@
    version="1.0.0"
    provider-name="nutch.org">
 
-
-
    <runtime>
       <library name="query-url.jar">
          <export name="*"/>
       </library>
    </runtime>
 
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
 
    <extension id="org.apache.nutch.searcher.url.URLQueryFilter"
               name="Nutch URL Query Filter"

Modified: lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/plugin.xml Tue Oct 18 13:59:40 2005
@@ -5,15 +5,17 @@
    version="1.0.0"
    provider-name="nutch.org">
 
-
-
    <runtime>
       <library name="urlfilter-prefix.jar">
          <export name="*"/>
       </library>
    </runtime>
 
-   <extension id="org.apache.nutch.net.urlfiler"
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <extension id="org.apache.nutch.net.urlfilter"
               name="Nutch Prefix URL Filter"
               point="org.apache.nutch.net.URLFilter">
       <implementation id="PrefixURLFilter"

Modified: lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/plugin.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/plugin.xml (original)
+++ lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/plugin.xml Tue Oct 18 13:59:40 2005
@@ -11,7 +11,11 @@
       </library>
    </runtime>
 
-   <extension id="org.apache.nutch.net.urlfiler"
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <extension id="org.apache.nutch.net.urlfilter"
               name="Nutch Regex URL Filter"
               point="org.apache.nutch.net.URLFilter">
       <implementation id="RegexURLFilter"

Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml?rev=326238&r1=326237&r2=326238&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml (original)
+++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml Tue Oct 18 13:59:40 2005
@@ -24,7 +24,7 @@
   </project>
 
   <docs label="Documentation">    
-    <faq         label="FAQ"              href="faq.html" />    
+    <faq         label="FAQ"              href="ext:faq" />    
     <wiki        label="Wiki"             href="ext:wiki" />    
     <tutorial    label="Tutorial"         href="tutorial.html" />
     <webmasters  label="Robot     "       href="bot.html" />
@@ -46,6 +46,7 @@
   <external-refs>
     <lucene    href="http://lucene.apache.org/java/" />
     <wiki      href="http://wiki.apache.org/nutch/" />
+    <faq       href="http://wiki.apache.org/nutch/FAQ" /> 
     <store     href="http://www.cafepress.com/nutch/" />
   </external-refs>