You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2009/10/09 15:54:28 UTC
svn commit: r823553 - in /lucene/nutch/trunk: CHANGES.txt
src/java/org/apache/nutch/parse/ParseData.java
src/java/org/apache/nutch/parse/ParseText.java
src/java/org/apache/nutch/protocol/Content.java
src/test/org/apache/nutch/util/TestNodeWalker.java
Author: ab
Date: Fri Oct 9 13:54:27 2009
New Revision: 823553
URL: http://svn.apache.org/viewvc?rev=823553&view=rev
Log:
NUTCH-754 Use GenericOptionsParser instead of FileSystem.parseArgs().
Modified:
lucene/nutch/trunk/CHANGES.txt
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java
lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java
Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=823553&r1=823552&r2=823553&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Fri Oct 9 13:54:27 2009
@@ -20,6 +20,9 @@
* NUTCH-757 - RequestUtils getBooleanParameter() always returns false
(Niall Pemberton via ab)
+* NUTCH-754 - Use GenericOptionsParser instead of FileSystem.parseArgs() (Julien
+ Nioche via ab)
+
Release 1.0 - 2009-03-23
1. NUTCH-474 - Fetcher2 crawlDelay and blocking fix (Dogacan Guney via ab)
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java?rev=823553&r1=823552&r2=823553&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java Fri Oct 9 13:54:27 2009
@@ -20,9 +20,12 @@
import java.io.*;
import java.util.*;
+import org.apache.commons.cli.Options;
import org.apache.hadoop.io.*;
+import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.util.NutchConfiguration;
@@ -205,11 +208,18 @@
return;
}
+ Options opts = new Options();
Configuration conf = NutchConfiguration.create();
- FileSystem fs = FileSystem.parseArgs(argv, 0, conf);
+
+ GenericOptionsParser parser =
+ new GenericOptionsParser(conf, opts, argv);
+
+ String[] remainingArgs = parser.getRemainingArgs();
+ FileSystem fs = FileSystem.get(conf);
+
try {
- int recno = Integer.parseInt(argv[0]);
- String segment = argv[1];
+ int recno = Integer.parseInt(remainingArgs[0]);
+ String segment = remainingArgs[1];
Path file = new Path(segment, DIR_NAME);
System.out.println("Reading from file: " + file);
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java?rev=823553&r1=823552&r2=823553&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java Fri Oct 9 13:54:27 2009
@@ -19,8 +19,10 @@
import java.io.*;
import org.apache.hadoop.io.*;
+import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.*;
+import org.apache.commons.cli.Options;
import org.apache.nutch.util.NutchConfiguration;
/* The text conversion of page's content, stored using gzip compression.
@@ -86,12 +88,18 @@
System.out.println("usage:" + usage);
return;
}
-
+ Options opts = new Options();
Configuration conf = NutchConfiguration.create();
- FileSystem fs = FileSystem.parseArgs(argv, 0, conf);
+
+ GenericOptionsParser parser =
+ new GenericOptionsParser(conf, opts, argv);
+
+ String[] remainingArgs = parser.getRemainingArgs();
+
+ FileSystem fs = FileSystem.get(conf);
try {
- int recno = Integer.parseInt(argv[0]);
- String segment = argv[1];
+ int recno = Integer.parseInt(remainingArgs[0]);
+ String segment = remainingArgs[1];
String filename = new Path(segment, ParseText.DIR_NAME).toString();
ParseText parseText = new ParseText();
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java?rev=823553&r1=823552&r2=823553&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java Fri Oct 9 13:54:27 2009
@@ -27,6 +27,7 @@
import java.util.zip.InflaterInputStream;
//Hadoop imports
+import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -35,6 +36,7 @@
import org.apache.hadoop.io.UTF8;
import org.apache.hadoop.io.VersionMismatchException;
import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.GenericOptionsParser;
//Nutch imports
import org.apache.nutch.metadata.Metadata;
@@ -255,11 +257,18 @@
System.out.println("usage:" + usage);
return;
}
+ Options opts = new Options();
Configuration conf = NutchConfiguration.create();
- FileSystem fs = FileSystem.parseArgs(argv, 0, conf);
+
+ GenericOptionsParser parser =
+ new GenericOptionsParser(conf, opts, argv);
+
+ String[] remainingArgs = parser.getRemainingArgs();
+ FileSystem fs = FileSystem.get(conf);
+
try {
- int recno = Integer.parseInt(argv[0]);
- String segment = argv[1];
+ int recno = Integer.parseInt(remainingArgs[0]);
+ String segment = remainingArgs[1];
Path file = new Path(segment, DIR_NAME);
System.out.println("Reading from file: " + file);
Modified: lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java?rev=823553&r1=823552&r2=823553&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java (original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java Fri Oct 9 13:54:27 2009
@@ -58,7 +58,10 @@
public void testSkipChildren() {
DOMParser parser= new DOMParser();
+
try {
+ parser.setFeature("http://xml.org/sax/features/validation", false);
+ parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
parser.parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
} catch (Exception e) {
e.printStackTrace();