You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2009/10/09 15:54:28 UTC

svn commit: r823553 - in /lucene/nutch/trunk: CHANGES.txt src/java/org/apache/nutch/parse/ParseData.java src/java/org/apache/nutch/parse/ParseText.java src/java/org/apache/nutch/protocol/Content.java src/test/org/apache/nutch/util/TestNodeWalker.java

Author: ab
Date: Fri Oct  9 13:54:27 2009
New Revision: 823553

URL: http://svn.apache.org/viewvc?rev=823553&view=rev
Log:
NUTCH-754 Use GenericOptionsParser instead of FileSystem.parseArgs().

Modified:
    lucene/nutch/trunk/CHANGES.txt
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
    lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=823553&r1=823552&r2=823553&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Fri Oct  9 13:54:27 2009
@@ -20,6 +20,9 @@
 * NUTCH-757 - RequestUtils getBooleanParameter() always returns false
   (Niall Pemberton via ab)
 
+* NUTCH-754 - Use GenericOptionsParser instead of FileSystem.parseArgs() (Julien
+  Nioche via ab)
+
 Release 1.0 - 2009-03-23
 
  1. NUTCH-474 - Fetcher2 crawlDelay and blocking fix (Dogacan Guney via ab)

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java?rev=823553&r1=823552&r2=823553&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java Fri Oct  9 13:54:27 2009
@@ -20,9 +20,12 @@
 import java.io.*;
 import java.util.*;
 
+import org.apache.commons.cli.Options;
 import org.apache.hadoop.io.*;
+import org.apache.hadoop.util.GenericOptionsParser;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.*;
+import org.apache.hadoop.fs.FileSystem;
 
 import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.util.NutchConfiguration;
@@ -205,11 +208,18 @@
       return;
     }
 
+    Options opts = new Options();
     Configuration conf = NutchConfiguration.create();
-    FileSystem fs = FileSystem.parseArgs(argv, 0, conf);
+    
+    GenericOptionsParser parser =
+      new GenericOptionsParser(conf, opts, argv);
+    
+    String[] remainingArgs = parser.getRemainingArgs();
+    FileSystem fs = FileSystem.get(conf);
+    
     try {
-      int recno = Integer.parseInt(argv[0]);
-      String segment = argv[1];
+      int recno = Integer.parseInt(remainingArgs[0]);
+      String segment = remainingArgs[1];
 
       Path file = new Path(segment, DIR_NAME);
       System.out.println("Reading from file: " + file);

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java?rev=823553&r1=823552&r2=823553&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java Fri Oct  9 13:54:27 2009
@@ -19,8 +19,10 @@
 
 import java.io.*;
 import org.apache.hadoop.io.*;
+import org.apache.hadoop.util.GenericOptionsParser;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.conf.*;
+import org.apache.commons.cli.Options;
 import org.apache.nutch.util.NutchConfiguration;
 
 /* The text conversion of page's content, stored using gzip compression.
@@ -86,12 +88,18 @@
       System.out.println("usage:" + usage);
       return;
     }
-
+    Options opts = new Options();
     Configuration conf = NutchConfiguration.create();
-    FileSystem fs = FileSystem.parseArgs(argv, 0, conf);
+    
+    GenericOptionsParser parser =
+      new GenericOptionsParser(conf, opts, argv);
+    
+    String[] remainingArgs = parser.getRemainingArgs();
+    
+    FileSystem fs = FileSystem.get(conf);
     try {
-      int recno = Integer.parseInt(argv[0]);
-      String segment = argv[1];
+      int recno = Integer.parseInt(remainingArgs[0]);
+      String segment = remainingArgs[1];
       String filename = new Path(segment, ParseText.DIR_NAME).toString();
 
       ParseText parseText = new ParseText();

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java?rev=823553&r1=823552&r2=823553&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java Fri Oct  9 13:54:27 2009
@@ -27,6 +27,7 @@
 import java.util.zip.InflaterInputStream;
 
 //Hadoop imports
+import org.apache.commons.cli.Options;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -35,6 +36,7 @@
 import org.apache.hadoop.io.UTF8;
 import org.apache.hadoop.io.VersionMismatchException;
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.GenericOptionsParser;
 
 //Nutch imports
 import org.apache.nutch.metadata.Metadata;
@@ -255,11 +257,18 @@
       System.out.println("usage:" + usage);
       return;
     }
+    Options opts = new Options();
     Configuration conf = NutchConfiguration.create();
-    FileSystem fs = FileSystem.parseArgs(argv, 0, conf);
+    
+    GenericOptionsParser parser =
+      new GenericOptionsParser(conf, opts, argv);
+    
+    String[] remainingArgs = parser.getRemainingArgs();
+    FileSystem fs = FileSystem.get(conf);
+    
     try {
-      int recno = Integer.parseInt(argv[0]);
-      String segment = argv[1];
+      int recno = Integer.parseInt(remainingArgs[0]);
+      String segment = remainingArgs[1];
 
       Path file = new Path(segment, DIR_NAME);
       System.out.println("Reading from file: " + file);

Modified: lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java?rev=823553&r1=823552&r2=823553&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java (original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java Fri Oct  9 13:54:27 2009
@@ -58,7 +58,10 @@
 
   public void testSkipChildren() {
     DOMParser parser= new DOMParser();
+    
     try {
+      parser.setFeature("http://xml.org/sax/features/validation", false);
+      parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
       parser.parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
     } catch (Exception e) {
       e.printStackTrace();