You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2006/02/04 01:39:32 UTC
svn commit: r374796 [4/5] - in /lucene/nutch/trunk: bin/ conf/ lib/
lib/jetty-ext/ src/java/org/apache/nutch/analysis/
src/java/org/apache/nutch/clustering/ src/java/org/apache/nutch/crawl/
src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/f...
Modified: lucene/nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java Fri Feb 3 16:38:32 2006
@@ -26,9 +26,9 @@
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.OutlinkExtractor;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.util.LogFormatter;
import org.apache.nutch.util.CommandRunner;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.plugin.Extension;
import org.apache.nutch.plugin.PluginRepository;
@@ -58,7 +58,7 @@
// handy map from String contentType to String[] {command, timeoutString}
Hashtable TYPE_PARAMS_MAP = new Hashtable();
- private NutchConf nutchConf;
+ private Configuration conf;
private boolean loaded = false;
@@ -135,13 +135,13 @@
metaData.putAll(content.getMetadata()); // copy through
ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metaData);
- parseData.setConf(this.nutchConf);
+ parseData.setConf(this.conf);
return new ParseImpl(text, parseData);
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
- Extension[] extensions = conf.getPluginRepository().getExtensionPoint(
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
"org.apache.nutch.parse.Parser").getExtensions();
String contentType, command, timeoutString;
@@ -169,7 +169,7 @@
}
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java Fri Feb 3 16:38:32 2006
@@ -24,9 +24,10 @@
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParseException;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
import org.apache.nutch.crawl.CrawlDatum;
import junit.framework.TestCase;
@@ -80,7 +81,7 @@
fos.close();
// get nutch content
- Protocol protocol = new ProtocolFactory(new NutchConf()).getProtocol(urlString);
+ Protocol protocol = new ProtocolFactory(NutchConfiguration.create()).getProtocol(urlString);
content = protocol.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
protocol = null;
}
@@ -104,19 +105,19 @@
return;
}
- NutchConf nutchConf = new NutchConf();
+ Configuration conf = NutchConfiguration.create();
// loop alternately, total 10*2 times of invoking external command
for (int i=0; i<10; i++) {
// check external parser that does 'cat'
contentType = "application/vnd.nutch.example.cat";
content.setContentType(contentType);
- parse = new ParseUtil(nutchConf).parseByParserId("parse-ext", content);
+ parse = new ParseUtil(conf).parseByParserId("parse-ext", content);
assertEquals(expectedText,parse.getText());
// check external parser that does 'md5sum'
contentType = "application/vnd.nutch.example.md5sum";
content.setContentType(contentType);
- parse = new ParseUtil(nutchConf).parseByParserId("parse-ext", content);
+ parse = new ParseUtil(conf).parseByParserId("parse-ext", content);
assertTrue(parse.getText().startsWith(expectedMD5sum));
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java Fri Feb 3 16:38:32 2006
@@ -22,7 +22,7 @@
import java.util.HashMap;
import org.apache.nutch.parse.Outlink;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
import org.w3c.dom.*;
@@ -287,7 +287,7 @@
* nekohtml).
*/
public static final void getOutlinks(URL base, ArrayList outlinks,
- Node node, NutchConf nutchConf) {
+ Node node, Configuration conf) {
NodeList children = node.getChildNodes();
int childLen= 0;
@@ -323,7 +323,7 @@
try {
URL url = new URL(base, target);
outlinks.add(new Outlink(url.toString(),
- linkText.toString().trim(), nutchConf));
+ linkText.toString().trim(), conf));
} catch (MalformedURLException e) {
// don't care
}
@@ -333,7 +333,7 @@
}
}
for ( int i = 0; i < childLen; i++ ) {
- getOutlinks(base, outlinks, children.item(i), nutchConf);
+ getOutlinks(base, outlinks, children.item(i), conf);
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java Fri Feb 3 16:38:32 2006
@@ -32,9 +32,10 @@
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
import org.apache.nutch.parse.*;
-
+import org.apache.nutch.util.*;
public class HtmlParser implements Parser {
public static final Logger LOG =
@@ -93,7 +94,7 @@
private String defaultCharEncoding;
- private NutchConf nutchConf;
+ private Configuration conf;
private HtmlParseFilters htmlParseFilters;
@@ -200,7 +201,7 @@
status.setMessage(metaTags.getRefreshHref().toString());
}
ParseData parseData = new ParseData(status, title, outlinks, metadata);
- parseData.setConf(this.nutchConf);
+ parseData.setConf(this.conf);
Parse parse = new ParseImpl(text, parseData);
// run filters on parse
@@ -271,22 +272,22 @@
in.readFully(bytes);
Parse parse = new HtmlParser().getParse(new Content(url,url,
bytes,"text/html",
- new ContentProperties(), new NutchConf()));
+ new ContentProperties(), NutchConfiguration.create()));
System.out.println("data: "+parse.getData());
System.out.println("text: "+parse.getText());
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
this.htmlParseFilters = new HtmlParseFilters(getConf());
this.parserImpl = getConf().get("parser.html.impl", "neko");
this.defaultCharEncoding = getConf().get(
"parser.character.encoding.default", "windows-1252");
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java Fri Feb 3 16:38:32 2006
@@ -19,7 +19,8 @@
import junit.framework.TestCase;
import org.apache.nutch.parse.Outlink;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
import java.io.ByteArrayInputStream;
import java.net.MalformedURLException;
@@ -175,7 +176,7 @@
}
private static void setup() {
- NutchConf nutchConf = new NutchConf();
+ Configuration conf = NutchConfiguration.create();
DOMFragmentParser parser= new DOMFragmentParser();
for (int i= 0; i < testPages.length; i++) {
DocumentFragment node=
@@ -194,36 +195,36 @@
try {
answerOutlinks = new Outlink[][]{
{
- new Outlink("http://www.nutch.org", "anchor", nutchConf),
+ new Outlink("http://www.nutch.org", "anchor", conf),
},
{
- new Outlink("http://www.nutch.org/", "home", nutchConf),
- new Outlink("http://www.nutch.org/docs/bot.html", "bots", nutchConf),
+ new Outlink("http://www.nutch.org/", "home", conf),
+ new Outlink("http://www.nutch.org/docs/bot.html", "bots", conf),
},
{
- new Outlink("http://www.nutch.org/", "separate this", nutchConf),
- new Outlink("http://www.nutch.org/docs/ok", "from this", nutchConf),
+ new Outlink("http://www.nutch.org/", "separate this", conf),
+ new Outlink("http://www.nutch.org/docs/ok", "from this", conf),
},
{
- new Outlink("http://www.nutch.org/", "home", nutchConf),
- new Outlink("http://www.nutch.org/docs/1", "1", nutchConf),
- new Outlink("http://www.nutch.org/docs/2", "2", nutchConf),
+ new Outlink("http://www.nutch.org/", "home", conf),
+ new Outlink("http://www.nutch.org/docs/1", "1", conf),
+ new Outlink("http://www.nutch.org/docs/2", "2", conf),
},
{
- new Outlink("http://www.nutch.org/frames/top.html", "", nutchConf),
- new Outlink("http://www.nutch.org/frames/left.html", "", nutchConf),
- new Outlink("http://www.nutch.org/frames/invalid.html", "", nutchConf),
- new Outlink("http://www.nutch.org/frames/right.html", "", nutchConf),
+ new Outlink("http://www.nutch.org/frames/top.html", "", conf),
+ new Outlink("http://www.nutch.org/frames/left.html", "", conf),
+ new Outlink("http://www.nutch.org/frames/invalid.html", "", conf),
+ new Outlink("http://www.nutch.org/frames/right.html", "", conf),
},
{
- new Outlink("http://www.nutch.org/maps/logo.gif", "", nutchConf),
- new Outlink("http://www.nutch.org/index.html", "", nutchConf),
- new Outlink("http://www.nutch.org/maps/#bottom", "", nutchConf),
- new Outlink("http://www.nutch.org/bot.html", "", nutchConf),
- new Outlink("http://www.nutch.org/docs/index.html", "", nutchConf),
+ new Outlink("http://www.nutch.org/maps/logo.gif", "", conf),
+ new Outlink("http://www.nutch.org/index.html", "", conf),
+ new Outlink("http://www.nutch.org/maps/#bottom", "", conf),
+ new Outlink("http://www.nutch.org/bot.html", "", conf),
+ new Outlink("http://www.nutch.org/docs/index.html", "", conf),
},
{
- new Outlink("http://www.nutch.org/index.html", "whitespace test", nutchConf),
+ new Outlink("http://www.nutch.org/index.html", "whitespace test", conf),
},
{
}
@@ -284,7 +285,7 @@
setup();
for (int i= 0; i < testPages.length; i++) {
ArrayList outlinks= new ArrayList();
- DOMContentUtils.getOutlinks(testBaseHrefURLs[i], outlinks, testDOMs[i], new NutchConf());
+ DOMContentUtils.getOutlinks(testBaseHrefURLs[i], outlinks, testDOMs[i], NutchConfiguration.create());
Outlink[] outlinkArr= new Outlink[outlinks.size()];
outlinkArr= (Outlink[]) outlinks.toArray(outlinkArr);
compareOutlinks(answerOutlinks[i], outlinkArr);
Modified: lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java Fri Feb 3 16:38:32 2006
@@ -23,8 +23,9 @@
import org.apache.nutch.parse.Parser;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.nutch.util.NutchConfiguration;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
import org.apache.oro.text.regex.MatchResult;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.PatternCompiler;
@@ -52,7 +53,7 @@
private static final int MAX_TITLE_LEN = 80;
- private NutchConf nutchConf;
+ private Configuration conf;
public Parse filter(Content content, Parse parse, HTMLMetaTags metaTags, DocumentFragment doc) {
String url = content.getBaseUrl();
@@ -68,7 +69,7 @@
String text = parse.getText();
Outlink[] newlinks = (Outlink[])outlinks.toArray(new Outlink[outlinks.size()]);
ParseData parseData = new ParseData(status, title, newlinks, metadata);
- parseData.setConf(this.nutchConf);
+ parseData.setConf(this.conf);
parse = new ParseImpl(text, parseData);
}
return parse;
@@ -146,7 +147,7 @@
metadata.putAll(c.getMetadata());
ParseData pd = new ParseData(ParseStatus.STATUS_SUCCESS, title,
outlinks, metadata);
- pd.setConf(this.nutchConf);
+ pd.setConf(this.conf);
Parse parse = new ParseImpl(script, pd);
return parse;
}
@@ -232,18 +233,18 @@
String line = null;
while ((line = br.readLine()) != null) sb.append(line + "\n");
JSParseFilter parseFilter = new JSParseFilter();
- parseFilter.setConf(new NutchConf());
+ parseFilter.setConf(NutchConfiguration.create());
Outlink[] links = parseFilter.getJSLinks(sb.toString(), args[1], args[1]);
System.out.println("Outlinks extracted: " + links.length);
for (int i = 0; i < links.length; i++)
System.out.println(" - " + links[i]);
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java Fri Feb 3 16:38:32 2006
@@ -19,7 +19,7 @@
import org.apache.nutch.parse.*;
import org.apache.nutch.protocol.Content;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
import org.farng.mp3.MP3File;
import org.farng.mp3.TagException;
import org.farng.mp3.id3.AbstractID3v2;
@@ -41,7 +41,7 @@
public class MP3Parser implements Parser {
private MetadataCollector metadataCollector;
- private NutchConf nutchConf;
+ private Configuration conf;
public Parse getParse(Content content) throws ParseException {
Parse parse = null;
@@ -115,12 +115,12 @@
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
this.metadataCollector = new MetadataCollector(conf);
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java Fri Feb 3 16:38:32 2006
@@ -17,7 +17,7 @@
package org.apache.nutch.parse.mp3;
import org.apache.nutch.parse.Outlink;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
import java.net.MalformedURLException;
import java.util.ArrayList;
@@ -35,10 +35,10 @@
private String album = null;
private ArrayList links = new ArrayList();
private String text = "";
- private NutchConf nutchConf;
+ private Configuration conf;
- public MetadataCollector(NutchConf nutchConf) {
- this.nutchConf = nutchConf;
+ public MetadataCollector(Configuration conf) {
+ this.conf = conf;
}
public void notifyProperty(String name, String value) throws MalformedURLException {
@@ -50,7 +50,7 @@
setArtist(value);
if (name.indexOf("URL Link") > -1) {
- links.add(new Outlink(value, "", this.nutchConf));
+ links.add(new Outlink(value, "", this.conf));
} else if (name.indexOf("Text") > -1) {
text += value + "\n";
}
Modified: lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java Fri Feb 3 16:38:32 2006
@@ -21,7 +21,7 @@
import java.util.Vector;
import java.util.logging.Logger;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.util.LogFormatter;
import org.apache.poi.hdf.extractor.Utils;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
@@ -411,4 +411,4 @@
return slides;
}
-}
\ No newline at end of file
+}
Modified: lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java Fri Feb 3 16:38:32 2006
@@ -31,8 +31,9 @@
import org.apache.nutch.parse.Parser;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
/**
* Nutch-Parser for parsing MS PowerPoint slides ( mime type:
@@ -52,7 +53,7 @@
private static final Logger LOG = LogFormatter
.getLogger(MSPowerPointParser.class.getName());
- private NutchConf nutchConf;
+ private Configuration conf;
/**
*
@@ -80,7 +81,7 @@
ContentProperties prop = new ContentProperties();
prop.setProperty("Content-Length", "" + raw.length);
- Content content = new Content(file, file, raw, MIME_TYPE, prop, new NutchConf());
+ Content content = new Content(file, file, raw, MIME_TYPE, prop, NutchConfiguration.create());
System.out.println(ppe.getParse(content).getText());
}
@@ -144,7 +145,7 @@
final ParseStatus status = new ParseStatus(ParseStatus.SUCCESS);
final ParseData parseData = new ParseData(status, title, outlinks, metadata);
- parseData.setConf(this.nutchConf);
+ parseData.setConf(this.conf);
LOG.finest("PowerPoint file parsed sucessful.");
return new ParseImpl(plainText, parseData);
@@ -166,11 +167,11 @@
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java Fri Feb 3 16:38:32 2006
@@ -22,7 +22,7 @@
import java.util.Properties;
import java.util.logging.Logger;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.util.LogFormatter;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
@@ -150,4 +150,4 @@
notifyAll();
}
}
-}
\ No newline at end of file
+}
Modified: lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PropertiesReaderListener.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PropertiesReaderListener.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PropertiesReaderListener.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PropertiesReaderListener.java Fri Feb 3 16:38:32 2006
@@ -22,7 +22,7 @@
import java.util.logging.Logger;
import org.apache.nutch.parse.mspowerpoint.PPTExtractor.PropertiesBroker;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.util.LogFormatter;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
@@ -126,4 +126,4 @@
this.properties.setProperty(name, this.dateFormatter.format(value));
}
}
-}
\ No newline at end of file
+}
Modified: lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java Fri Feb 3 16:38:32 2006
@@ -33,10 +33,11 @@
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.ProtocolFactory;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
import org.apache.nutch.crawl.CrawlDatum;
/**
@@ -106,7 +107,7 @@
this.urlString = createUrl(this.testFile.getName());
System.out.println("Testing file: " + this.urlString + "...");
- this.protocol =new ProtocolFactory(new NutchConf()).getProtocol(this.urlString);
+ this.protocol =new ProtocolFactory(NutchConfiguration.create()).getProtocol(this.urlString);
this.content = this.protocol.getProtocolOutput(new UTF8(this.urlString), new CrawlDatum()).getContent();
}
@@ -126,7 +127,7 @@
*/
public void testContent() throws Exception {
- Parse parse = new ParseUtil(new NutchConf()).parseByParserId("parse-mspowerpoint",this.content);
+ Parse parse = new ParseUtil(NutchConfiguration.create()).parseByParserId("parse-mspowerpoint",this.content);
ParseData data = parse.getData();
String text = parse.getText();
@@ -163,7 +164,7 @@
*/
public void testMeta() throws Exception {
- Parse parse = new ParseUtil(new NutchConf()).parseByParserId("parse-mspowerpoint",content);
+ Parse parse = new ParseUtil(NutchConfiguration.create()).parseByParserId("parse-mspowerpoint",content);
ParseData data = parse.getData();
Modified: lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java Fri Feb 3 16:38:32 2006
@@ -18,8 +18,8 @@
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.Parser;
import org.apache.nutch.parse.Parse;
@@ -51,7 +51,7 @@
*/
public class MSWordParser implements Parser {
- private NutchConf nutchConf;
+ private Configuration conf;
// public static final Logger LOG =
// LogFormatter.getLogger("org.apache.nutch.parse.msword");
@@ -73,7 +73,7 @@
&& raw.length != Integer.parseInt(contentLength)) {
return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED,
"Content truncated at " + raw.length
- +" bytes. Parser can't handle incomplete msword file.").getEmptyParse(this.nutchConf);
+ +" bytes. Parser can't handle incomplete msword file.").getEmptyParse(this.conf);
}
WordExtractor extractor = new WordExtractor();
@@ -87,14 +87,14 @@
extractor = null;
} catch (ParseException e) {
- return new ParseStatus(e).getEmptyParse(this.nutchConf);
+ return new ParseStatus(e).getEmptyParse(this.conf);
} catch (FastSavedException e) {
- return new ParseStatus(e).getEmptyParse(this.nutchConf);
+ return new ParseStatus(e).getEmptyParse(this.conf);
} catch (PasswordProtectedException e) {
- return new ParseStatus(e).getEmptyParse(this.nutchConf);
+ return new ParseStatus(e).getEmptyParse(this.conf);
} catch (Exception e) { // run time exception
return new ParseStatus(ParseStatus.FAILED,
- "Can't be handled as msword document. " + e).getEmptyParse(this.nutchConf);
+ "Can't be handled as msword document. " + e).getEmptyParse(this.conf);
} finally {
// nothing so far
}
@@ -116,21 +116,21 @@
title = "";
// collect outlink
- Outlink[] outlinks = OutlinkExtractor.getOutlinks(text, this.nutchConf);
+ Outlink[] outlinks = OutlinkExtractor.getOutlinks(text, this.conf);
ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metadata);
- parseData.setConf(this.nutchConf);
+ parseData.setConf(this.conf);
return new ParseImpl(text, parseData);
// any filter?
//return HtmlParseFilters.filter(content, parse, root);
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java Fri Feb 3 16:38:32 2006
@@ -24,9 +24,10 @@
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParseException;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
import org.apache.nutch.crawl.CrawlDatum;
import junit.framework.TestCase;
@@ -62,13 +63,13 @@
Content content;
Parse parse;
- NutchConf nutchConf = new NutchConf();
+ Configuration conf = NutchConfiguration.create();
for (int i=0; i<sampleFiles.length; i++) {
urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
- protocol = new ProtocolFactory(nutchConf).getProtocol(urlString);
+ protocol = new ProtocolFactory(conf).getProtocol(urlString);
content = protocol.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
- parse = new ParseUtil(nutchConf).parseByParserId("parse-msword",content);
+ parse = new ParseUtil(conf).parseByParserId("parse-msword",content);
assertTrue(parse.getText().startsWith(expectedText));
}
Modified: lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java Fri Feb 3 16:38:32 2006
@@ -27,8 +27,8 @@
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.Parser;
import org.apache.nutch.parse.Parse;
@@ -63,7 +63,7 @@
public class PdfParser implements Parser {
public static final Logger LOG =
LogFormatter.getLogger("org.apache.nutch.parse.pdf");
- private NutchConf nutchConf;
+ private Configuration conf;
public PdfParser () {
// redirect org.apache.log4j.Logger to java's native logger, in order
@@ -78,7 +78,7 @@
org.apache.log4j.Appender appender = new org.apache.log4j.WriterAppender(
new org.apache.log4j.SimpleLayout(),
- org.apache.nutch.util.LogFormatter.getLogStream(
+ org.apache.hadoop.util.LogFormatter.getLogStream(
this.LOG, java.util.logging.Level.INFO));
rootLogger.addAppender(appender);
@@ -166,7 +166,7 @@
metadata.putAll(content.getMetadata()); // copy through
ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metadata);
- parseData.setConf(this.nutchConf);
+ parseData.setConf(this.conf);
return new ParseImpl(text, parseData);
// any filter?
//return HtmlParseFilters.filter(content, parse, root);
@@ -183,12 +183,12 @@
return retval;
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java Fri Feb 3 16:38:32 2006
@@ -24,9 +24,10 @@
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParseException;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
import org.apache.nutch.crawl.CrawlDatum;
import junit.framework.TestCase;
@@ -65,10 +66,10 @@
for (int i=0; i<sampleFiles.length; i++) {
urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
- NutchConf nutchConf = new NutchConf();
- protocol = new ProtocolFactory(nutchConf).getProtocol(urlString);
+ Configuration conf = NutchConfiguration.create();
+ protocol = new ProtocolFactory(conf).getProtocol(urlString);
content = protocol.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
- parse = new ParseUtil(nutchConf).parseByParserId("parse-pdf",content);
+ parse = new ParseUtil(conf).parseByParserId("parse-pdf",content);
int index = parse.getText().indexOf(expectedText);
assertTrue(index > 0);
Modified: lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java Fri Feb 3 16:38:32 2006
@@ -17,8 +17,8 @@
package org.apache.nutch.parse.rss;
import org.apache.nutch.protocol.Content;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.parse.Parser;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseStatus;
@@ -64,7 +64,7 @@
public class RSSParser implements Parser {
public static final Logger LOG = LogFormatter
.getLogger("org.apache.nutch.parse.rss");
- private NutchConf nutchConf;
+ private Configuration conf;
/**
* <p>
@@ -85,7 +85,7 @@
org.apache.log4j.Appender appender = new org.apache.log4j.WriterAppender(
new org.apache.log4j.SimpleLayout(),
- org.apache.nutch.util.LogFormatter.getLogStream(this.LOG,
+ org.apache.hadoop.util.LogFormatter.getLogStream(this.LOG,
java.util.logging.Level.INFO));
rootLogger.addAppender(appender);
@@ -213,16 +213,16 @@
ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
contentTitle.toString(), outlinks, content.getMetadata());
- parseData.setConf(this.nutchConf);
+ parseData.setConf(this.conf);
return new ParseImpl(indexText.toString(), parseData);
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java Fri Feb 3 16:38:32 2006
@@ -26,9 +26,10 @@
import org.apache.nutch.parse.ParseException;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.Outlink;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
import org.apache.nutch.crawl.CrawlDatum;
import junit.framework.TestCase;
@@ -80,13 +81,13 @@
Content content;
Parse parse;
- NutchConf nutchConf = new NutchConf();
+ Configuration conf = NutchConfiguration.create();
for (int i = 0; i < sampleFiles.length; i++) {
urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
- protocol = new ProtocolFactory(nutchConf).getProtocol(urlString);
+ protocol = new ProtocolFactory(conf).getProtocol(urlString);
content = protocol.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
- parse = new ParseUtil(nutchConf).parseByParserId("parse-rss",content);
+ parse = new ParseUtil(conf).parseByParserId("parse-rss",content);
//check that there are 3 outlinks:
//http://test.channel.com
Modified: lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java Fri Feb 3 16:38:32 2006
@@ -18,7 +18,7 @@
import org.apache.nutch.parse.*;
import org.apache.nutch.protocol.Content;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
import java.io.ByteArrayInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
@@ -33,7 +33,7 @@
*/
public class RTFParseFactory implements Parser {
- private NutchConf nutchConf;
+ private Configuration conf;
public Parse getParse(Content content) throws ParseException {
byte[] raw = content.getContent();
@@ -64,14 +64,14 @@
String text = delegate.getText();
return new ParseImpl(text, new ParseData(title, OutlinkExtractor
- .getOutlinks(text, this.nutchConf), metadata));
+ .getOutlinks(text, this.conf), metadata));
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java Fri Feb 3 16:38:32 2006
@@ -25,7 +25,7 @@
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.ProtocolException;
import org.apache.nutch.protocol.ProtocolFactory;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
import java.util.Properties;
@@ -61,12 +61,12 @@
Content content;
Parse parse;
- NutchConf nutchConf = new NutchConf();
+ Configuration conf = NutchConfiguration.create();
urlString = "file:" + sampleDir + fileSeparator + rtfFile;
- protocol = new ProtocolFactory(nutchConf).getProtocol(urlString);
+ protocol = new ProtocolFactory(conf).getProtocol(urlString);
content = protocol.getContent(urlString);
- parse = new ParseUtil(nutchConf).parseByParserId("parse-rtf", content);
+ parse = new ParseUtil(conf).parseByParserId("parse-rtf", content);
String text = parse.getText();
assertEquals("The quick brown fox jumps over the lazy dog", text.trim());
Modified: lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java Fri Feb 3 16:38:32 2006
@@ -24,8 +24,10 @@
import org.apache.nutch.parse.*;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
import com.anotherbigidea.flash.interfaces.*;
import com.anotherbigidea.flash.readers.*;
@@ -43,16 +45,16 @@
public class SWFParser implements Parser {
public static final Logger LOG = LogFormatter.getLogger("org.apache.nutch.parse.swf");
- private NutchConf nutchConf = null;
+ private Configuration conf = null;
public SWFParser() {}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return nutchConf;
+ public Configuration getConf() {
+ return conf;
}
public Parse getParse(Content content) {
@@ -70,7 +72,7 @@
String contentLength = content.get("Content-Length");
if (contentLength != null && raw.length != Integer.parseInt(contentLength)) {
return new ParseStatus(ParseStatus.FAILED, ParseStatus.FAILED_TRUNCATED, "Content truncated at " + raw.length
- + " bytes. Parser can't handle incomplete files.").getEmptyParse(nutchConf);
+ + " bytes. Parser can't handle incomplete files.").getEmptyParse(conf);
}
ExtractText extractor = new ExtractText();
@@ -90,16 +92,16 @@
// harvest potential outlinks
String[] links = extractor.getUrls();
for (int i = 0; i < links.length; i++) {
- Outlink out = new Outlink(links[i], "", nutchConf);
+ Outlink out = new Outlink(links[i], "", conf);
outlinks.add(out);
}
- Outlink[] olinks = OutlinkExtractor.getOutlinks(text, nutchConf);
+ Outlink[] olinks = OutlinkExtractor.getOutlinks(text, conf);
if (olinks != null) for (int i = 0; i < olinks.length; i++) {
outlinks.add(olinks[i]);
}
} catch (Exception e) { // run time exception
e.printStackTrace();
- return new ParseStatus(ParseStatus.FAILED, "Can't be handled as SWF document. " + e).getEmptyParse(nutchConf);
+ return new ParseStatus(ParseStatus.FAILED, "Can't be handled as SWF document. " + e).getEmptyParse(conf);
} finally {}
if (text == null) text = "";
@@ -118,7 +120,7 @@
in.read(buf);
SWFParser parser = new SWFParser();
Parse p = parser.getParse(new Content("file:" + args[0], "file:" + args[0], buf, "application/x-shockwave-flash",
- new ContentProperties(), new NutchConf()));
+ new ContentProperties(), NutchConfiguration.create()));
System.out.println("Parse Text:");
System.out.println(p.getText());
System.out.println("Parse Data:");
@@ -696,4 +698,4 @@
return null;
else return super.pop();
}
-}
\ No newline at end of file
+}
Modified: lucene/nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java Fri Feb 3 16:38:32 2006
@@ -21,7 +21,7 @@
import java.io.InputStreamReader;
import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
import org.apache.nutch.protocol.ProtocolFactory;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.Content;
@@ -32,7 +32,8 @@
import org.apache.nutch.parse.Parser;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseException;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
import junit.framework.TestCase;
@@ -81,7 +82,7 @@
Protocol protocol;
Content content;
Parse parse;
- NutchConf conf = new NutchConf();
+ Configuration conf = NutchConfiguration.create();
for (int i = 0; i < sampleFiles.length; i++) {
urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
Modified: lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java Fri Feb 3 16:38:32 2006
@@ -23,8 +23,10 @@
import org.apache.nutch.parse.*;
import org.apache.nutch.util.*;
+import org.apache.hadoop.conf.Configuration;
+
public class TextParser implements Parser {
- private NutchConf nutchConf;
+ private Configuration conf;
public Parse getParse(Content content) {
// copy content meta data through
@@ -50,16 +52,16 @@
}
ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "",
OutlinkExtractor.getOutlinks(text, getConf()), metadata);
- parseData.setConf(this.nutchConf);
+ parseData.setConf(this.conf);
return new ParseImpl(text, parseData);
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java Fri Feb 3 16:38:32 2006
@@ -32,8 +32,8 @@
import org.apache.nutch.parse.Parser;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
/**
* ZipParser class based on MSPowerPointParser class by Stephan Strittmatter.
@@ -45,7 +45,7 @@
private static final Logger LOG = LogFormatter.getLogger(ZipParser.class
.getName());
- private NutchConf nutchConf;
+ private Configuration conf;
/** Creates a new instance of ZipParser */
public ZipParser() {
@@ -102,18 +102,18 @@
outlinks = (Outlink[]) outLinksList.toArray(new Outlink[0]);
final ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS,
resultTitle, outlinks, metadata);
- parseData.setConf(this.nutchConf);
+ parseData.setConf(this.conf);
LOG.finest("Zip file parsed sucessfully !!");
return new ParseImpl(resultText, parseData);
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java Fri Feb 3 16:38:32 2006
@@ -34,8 +34,8 @@
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.util.mime.MimeTypes;
@@ -50,13 +50,13 @@
public static final Logger LOG = LogFormatter.getLogger(ZipTextExtractor.class.getName());
-private NutchConf nutchConf;
+private Configuration conf;
/** Creates a new instance of ZipTextExtractor */
- public ZipTextExtractor(NutchConf nutchConf) {
- this.nutchConf = nutchConf;
- this.MIME = MimeTypes.get(nutchConf.get("mime.types.file"));
+ public ZipTextExtractor(Configuration conf) {
+ this.conf = conf;
+ this.MIME = MimeTypes.get(conf.get("mime.types.file"));
}
public String extractText(InputStream input, String url, List outLinksList) throws IOException {
@@ -91,13 +91,13 @@
ContentProperties metadata = new ContentProperties();
metadata.setProperty("Content-Length", Long.toString(entry.getSize()));
metadata.setProperty("Content-Type", contentType);
- Content content = new Content(newurl, base, b, contentType, metadata, this.nutchConf);
- Parse parse = new ParseUtil(this.nutchConf).parse(content);
+ Content content = new Content(newurl, base, b, contentType, metadata, this.conf);
+ Parse parse = new ParseUtil(this.conf).parse(content);
ParseData theParseData = parse.getData();
Outlink[] theOutlinks = theParseData.getOutlinks();
for(int count = 0; count < theOutlinks.length; count++) {
- outLinksList.add(new Outlink(theOutlinks[count].getToUrl(), theOutlinks[count].getAnchor(), this.nutchConf));
+ outLinksList.add(new Outlink(theOutlinks[count].getToUrl(), theOutlinks[count].getAnchor(), this.conf));
}
resultText += entry.getName() + " " + parse.getText() + " ";
Modified: lucene/nutch/trunk/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java Fri Feb 3 16:38:32 2006
@@ -24,9 +24,10 @@
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParseException;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
import org.apache.nutch.crawl.CrawlDatum;
import junit.framework.TestCase;
@@ -62,7 +63,7 @@
Content content;
Parse parse;
- NutchConf conf = new NutchConf();
+ Configuration conf = NutchConfiguration.create();
for (int i = 0; i < sampleFiles.length; i++) {
urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
Modified: lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java Fri Feb 3 16:38:32 2006
@@ -18,11 +18,11 @@
import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
import org.apache.nutch.net.protocols.HttpDateFormat;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
@@ -59,7 +59,7 @@
// http date format
HttpDateFormat httpDateFormat = null;
- private NutchConf nutchConf;
+ private Configuration conf;
// constructor
public File() {
@@ -154,12 +154,12 @@
file = null;
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
this.maxContentLength = conf.getInt("file.content.limit", 64 * 1024);
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java Fri Feb 3 16:38:32 2006
@@ -26,7 +26,7 @@
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
/************************************
@@ -64,7 +64,7 @@
private ContentProperties headers = new ContentProperties();
private final File file;
- private NutchConf nutchConf;
+ private Configuration conf;
/** Returns the response code. */
public int getCode() { return code; }
@@ -79,16 +79,16 @@
public Content toContent() {
return new Content(orig, base, content,
getHeader("Content-Type"),
- headers, this.nutchConf);
+ headers, this.conf);
}
- public FileResponse(URL url, CrawlDatum datum, File file, NutchConf nutchConf)
+ public FileResponse(URL url, CrawlDatum datum, File file, Configuration conf)
throws FileException, IOException {
this.orig = url.toString();
this.base = url.toString();
this.file = file;
- this.nutchConf = nutchConf;
+ this.conf = conf;
if (!"file".equals(url.getProtocol()))
throw new FileException("Not a file url:" + url);
Modified: lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java Fri Feb 3 16:38:32 2006
@@ -20,11 +20,11 @@
import org.apache.commons.net.ftp.FTPFileEntryParser;
import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
import org.apache.nutch.net.protocols.HttpDateFormat;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
@@ -86,7 +86,7 @@
// http date format
HttpDateFormat httpDateFormat = null;
- private NutchConf nutchConf;
+ private Configuration conf;
// constructor
@@ -223,8 +223,8 @@
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
this.maxContentLength = conf.getInt("ftp.content.limit", 64 * 1024);
this.timeout = conf.getInt("ftp.timeout", 10000);
this.userName = conf.get("ftp.username", "anonymous");
@@ -234,8 +234,8 @@
this.followTalk = conf.getBoolean("ftp.follow.talk", false);
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java Fri Feb 3 16:38:32 2006
@@ -27,7 +27,7 @@
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
import java.net.InetAddress;
import java.net.URL;
@@ -65,7 +65,7 @@
private ContentProperties headers = new ContentProperties();
private final Ftp ftp;
- private NutchConf nutchConf;
+ private Configuration conf;
/** Returns the response code. */
public int getCode() { return code; }
@@ -80,16 +80,16 @@
public Content toContent() {
return new Content(orig, base, content,
getHeader("Content-Type"),
- headers, this.nutchConf);
+ headers, this.conf);
}
- public FtpResponse(URL url, CrawlDatum datum, Ftp ftp, NutchConf nutchConf)
+ public FtpResponse(URL url, CrawlDatum datum, Ftp ftp, Configuration conf)
throws FtpException, IOException {
this.orig = url.toString();
this.base = url.toString();
this.ftp = ftp;
- this.nutchConf = nutchConf;
+ this.conf = conf;
if (!"ftp".equals(url.getProtocol()))
throw new FtpException("Not a ftp url:" + url);
Modified: lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java Fri Feb 3 16:38:32 2006
@@ -26,8 +26,9 @@
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.protocol.ProtocolException;
import org.apache.nutch.protocol.http.api.HttpBase;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
public class Http extends HttpBase {
@@ -40,7 +41,7 @@
super(LOG);
}
- public void setConf(NutchConf conf) {
+ public void setConf(Configuration conf) {
super.setConf(conf);
Level logLevel = Level.WARNING;
if (conf.getBoolean("http.verbose", false)) {
@@ -51,7 +52,7 @@
public static void main(String[] args) throws Exception {
Http http = new Http();
- http.setConf(new NutchConf());
+ http.setConf(NutchConfiguration.create());
main(http, args);
}
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Fri Feb 3 16:38:32 2006
@@ -38,8 +38,9 @@
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.protocol.ProtocolException;
import org.apache.nutch.protocol.http.api.HttpBase;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
public class Http extends HttpBase {
@@ -49,7 +50,7 @@
private static MultiThreadedHttpConnectionManager connectionManager =
new MultiThreadedHttpConnectionManager();
- // Since the NutchConf has not yet been setted,
+ // Since the Configuration has not yet been setted,
// then an unconfigured client is returned.
private static HttpClient client = new HttpClient(connectionManager);
@@ -68,7 +69,7 @@
super(LOG);
}
- public void setConf(NutchConf conf) {
+ public void setConf(Configuration conf) {
super.setConf(conf);
this.maxThreadsTotal = conf.getInt("fetcher.threads.fetch", 10);
this.ntlmUsername = conf.get("http.auth.ntlm.username", "");
@@ -87,7 +88,7 @@
public static void main(String[] args) throws Exception {
Http http = new Http();
- http.setConf(new NutchConf());
+ http.setConf(NutchConfiguration.create());
main(http, args);
}
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java Fri Feb 3 16:38:32 2006
@@ -13,9 +13,9 @@
import java.util.logging.Logger;
import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
-import org.apache.nutch.util.NutchConfigurable;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configurable;
/**
@@ -30,7 +30,7 @@
*
* @author Matt Tencati
*/
-public class HttpAuthenticationFactory implements NutchConfigurable {
+public class HttpAuthenticationFactory implements Configurable {
/**
* The HTTP Authentication (WWW-Authenticate) header which is returned
@@ -43,19 +43,19 @@
private static Map auths = new TreeMap();
- private NutchConf conf = null;
+ private Configuration conf = null;
- public HttpAuthenticationFactory(NutchConf conf) {
+ public HttpAuthenticationFactory(Configuration conf) {
setConf(conf);
}
/* ---------------------------------- *
- * <implementation:NutchConfigurable> *
+ * <implementation:Configurable> *
* ---------------------------------- */
- public void setConf(NutchConf conf) {
+ public void setConf(Configuration conf) {
this.conf = conf;
if (conf.getBoolean("http.auth.verbose", false)) {
LOG.setLevel(Level.FINE);
@@ -64,12 +64,12 @@
}
}
- public NutchConf getConf() {
+ public Configuration getConf() {
return conf;
}
/* ---------------------------------- *
- * <implementation:NutchConfigurable> *
+ * <implementation:Configurable> *
* ---------------------------------- */
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java Fri Feb 3 16:38:32 2006
@@ -14,9 +14,9 @@
import org.apache.commons.codec.binary.Base64;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
-import org.apache.nutch.util.NutchConfigurable;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configurable;
/**
* Implementation of RFC 2617 Basic Authentication. Usernames and passwords are stored
@@ -26,7 +26,7 @@
*
* @author Matt Tencati
*/
-public class HttpBasicAuthentication implements HttpAuthentication, NutchConfigurable {
+public class HttpBasicAuthentication implements HttpAuthentication, Configurable {
public static final Logger LOG =
LogFormatter.getLogger(HttpBasicAuthentication.class.getName());
@@ -35,7 +35,7 @@
private static Map authMap = new TreeMap();
- private NutchConf conf = null;
+ private Configuration conf = null;
private String challenge = null;
private ArrayList credentials = null;
private String realm = null;
@@ -49,9 +49,9 @@
*
* @param challenge WWW-Authenticate header from web server
*/
- protected HttpBasicAuthentication(String challenge, NutchConf nutchConf) throws HttpAuthenticationException {
+ protected HttpBasicAuthentication(String challenge, Configuration conf) throws HttpAuthenticationException {
- setConf(nutchConf);
+ setConf(conf);
this.challenge = challenge;
LOG.fine("BasicAuthentication challenge is " + challenge);
credentials = new ArrayList();
@@ -76,10 +76,10 @@
/* ---------------------------------- *
- * <implementation:NutchConfigurable> *
+ * <implementation:Configurable> *
* ---------------------------------- */
- public void setConf(NutchConf conf) {
+ public void setConf(Configuration conf) {
this.conf = conf;
if (conf.getBoolean("http.auth.verbose", false)) {
LOG.setLevel(Level.FINE);
@@ -88,12 +88,12 @@
}
}
- public NutchConf getConf() {
+ public Configuration getConf() {
return this.conf;
}
/* ---------------------------------- *
- * <implementation:NutchConfigurable> *
+ * <implementation:Configurable> *
* ---------------------------------- */
@@ -129,7 +129,7 @@
* @return An HttpBasicAuthentication object or null
* if unable to generate appropriate credentials.
*/
- public static HttpBasicAuthentication getAuthentication(String challenge, NutchConf conf) {
+ public static HttpBasicAuthentication getAuthentication(String challenge, Configuration conf) {
if (challenge == null) return null;
Matcher basicMatcher = basic.matcher(challenge);
if (basicMatcher.matches()) {
Modified: lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java Fri Feb 3 16:38:32 2006
@@ -26,7 +26,7 @@
import org.apache.nutch.searcher.QueryFilter;
import org.apache.nutch.searcher.Query;
import org.apache.nutch.searcher.Query.*;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
import java.util.HashSet;
@@ -71,7 +71,7 @@
* sloppy phrase match. */
public void setSlop(int slop) { SLOP = slop; }
- private NutchConf nutchConf;
+ private Configuration conf;
public BooleanQuery filter(Query input, BooleanQuery output) {
addTerms(input, output);
@@ -168,8 +168,8 @@
return new org.apache.lucene.index.Term(field, term.toString());
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
this.URL_BOOST = conf.getFloat("query.url.boost", 4.0f);
this.ANCHOR_BOOST = conf.getFloat("query.anchor.boost", 2.0f);
this.TITLE_BOOST = conf.getFloat("query.title.boost", 1.5f);
@@ -177,7 +177,7 @@
this.PHRASE_BOOST = conf.getFloat("query.phrase.boost", 1.0f);
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/DateQueryFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/DateQueryFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/DateQueryFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/DateQueryFilter.java Fri Feb 3 16:38:32 2006
@@ -21,8 +21,8 @@
import org.apache.nutch.searcher.QueryFilter;
import org.apache.nutch.searcher.QueryException;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.RangeQuery;
@@ -49,7 +49,7 @@
// query syntax is defined as date:yyyymmdd-yyyymmdd
private static final Pattern pattern = Pattern.compile("^(\\d{8})-(\\d{8})$");
- private NutchConf nutchConf;
+ private Configuration conf;
public BooleanQuery filter(Query input, BooleanQuery output)
throws QueryException {
@@ -87,11 +87,11 @@
return output;
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/TypeQueryFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/TypeQueryFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/TypeQueryFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/TypeQueryFilter.java Fri Feb 3 16:38:32 2006
@@ -17,7 +17,7 @@
package org.apache.nutch.searcher.more;
import org.apache.nutch.searcher.RawFieldQueryFilter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
/**
* Handles "type:" query clauses, causing them to search the field
@@ -27,17 +27,17 @@
*/
public class TypeQueryFilter extends RawFieldQueryFilter {
- private NutchConf nutchConf;
+ private Configuration conf;
public TypeQueryFilter() {
super("type");
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/query-site/src/java/org/apache/nutch/searcher/site/SiteQueryFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/query-site/src/java/org/apache/nutch/searcher/site/SiteQueryFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/query-site/src/java/org/apache/nutch/searcher/site/SiteQueryFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/query-site/src/java/org/apache/nutch/searcher/site/SiteQueryFilter.java Fri Feb 3 16:38:32 2006
@@ -17,22 +17,22 @@
package org.apache.nutch.searcher.site;
import org.apache.nutch.searcher.RawFieldQueryFilter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
/** Handles "site:" query clauses, causing them to search the field indexed by
* SiteIndexingFilter. */
public class SiteQueryFilter extends RawFieldQueryFilter {
- private NutchConf nutchConf;
+ private Configuration conf;
public SiteQueryFilter() {
super("site");
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/query-url/src/java/org/apache/nutch/searcher/url/URLQueryFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/query-url/src/java/org/apache/nutch/searcher/url/URLQueryFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/query-url/src/java/org/apache/nutch/searcher/url/URLQueryFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/query-url/src/java/org/apache/nutch/searcher/url/URLQueryFilter.java Fri Feb 3 16:38:32 2006
@@ -17,24 +17,24 @@
package org.apache.nutch.searcher.url;
import org.apache.nutch.searcher.FieldQueryFilter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
/**
* Handles "url:" query clauses, causing them to search the field indexed by
* BasicIndexingFilter.
*/
public class URLQueryFilter extends FieldQueryFilter {
- private NutchConf nutchConf;
+ private Configuration conf;
public URLQueryFilter() {
super("url");
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java Fri Feb 3 16:38:32 2006
@@ -18,8 +18,8 @@
package org.apache.nutch.net;
-import org.apache.nutch.util.NutchConf;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.LogFormatter;
import org.apache.nutch.util.PrefixStringMatcher;
import org.apache.nutch.util.TrieStringMatcher;
@@ -55,7 +55,7 @@
private TrieStringMatcher trie;
- private NutchConf nutchConf;
+ private Configuration conf;
public PrefixURLFilter() throws IOException {
@@ -114,11 +114,11 @@
}
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
String pluginName = "urlfilter-prefix";
- Extension[] extensions = conf.getPluginRepository().getExtensionPoint(
+ Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
URLFilter.class.getName()).getExtensions();
for (int i = 0; i < extensions.length; i++) {
Extension extension = extensions[i];
@@ -156,8 +156,8 @@
}
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/net/RegexURLFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/net/RegexURLFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/net/RegexURLFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/net/RegexURLFilter.java Fri Feb 3 16:38:32 2006
@@ -16,8 +16,8 @@
package org.apache.nutch.net;
-import org.apache.nutch.util.NutchConf;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.LogFormatter;
import org.apache.nutch.plugin.Extension;
import org.apache.nutch.plugin.PluginRepository;
@@ -64,7 +64,7 @@
private List rules;
- private NutchConf nutchConf;
+ private Configuration conf;
public RegexURLFilter() {
}
@@ -152,10 +152,10 @@
}
}
- public void setConf(NutchConf conf) {
- this.nutchConf = conf;
+ public void setConf(Configuration conf) {
+ this.conf = conf;
String pluginName = "urlfilter-regex";
- Extension[] extensions = conf.getPluginRepository().getExtensionPoint(
+ Extension[] extensions = PluginRepository.get(conf).getExtensionPoint(
URLFilter.class.getName()).getExtensions();
for (int i = 0; i < extensions.length; i++) {
Extension extension = extensions[i];
@@ -191,8 +191,8 @@
}
}
- public NutchConf getConf() {
- return this.nutchConf;
+ public Configuration getConf() {
+ return this.conf;
}
}
Modified: lucene/nutch/trunk/src/test/nutch-site.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/nutch-site.xml?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/nutch-site.xml (original)
+++ lucene/nutch/trunk/src/test/nutch-site.xml Fri Feb 3 16:38:32 2006
@@ -2,7 +2,7 @@
<!-- Configuration overrides used during unit tests. -->
-<nutch-conf>
+<configuration>
<property>
<name>plugin.includes</name>
@@ -10,4 +10,4 @@
<description>Enable all plugins during unit testing.</description>
</property>
-</nutch-conf>
+</configuration>
Modified: lucene/nutch/trunk/src/test/org/apache/nutch/analysis/TestQueryParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/analysis/TestQueryParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/analysis/TestQueryParser.java (original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/analysis/TestQueryParser.java Fri Feb 3 16:38:32 2006
@@ -17,7 +17,8 @@
package org.apache.nutch.analysis;
import org.apache.nutch.searcher.Query;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
import junit.framework.TestCase;
@@ -27,10 +28,10 @@
*/
public class TestQueryParser extends TestCase {
- private static NutchConf nutchConf = new NutchConf();
+ private static Configuration conf = NutchConfiguration.create();
public void assertQueryEquals(String query, String result) throws Exception {
try {
- Query q = NutchAnalysis.parseQuery(query, nutchConf);
+ Query q = NutchAnalysis.parseQuery(query, conf);
String s = q.toString();
if (!s.equals(result)) {
fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
Modified: lucene/nutch/trunk/src/test/org/apache/nutch/net/TestBasicUrlNormalizer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/net/TestBasicUrlNormalizer.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/net/TestBasicUrlNormalizer.java (original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/net/TestBasicUrlNormalizer.java Fri Feb 3 16:38:32 2006
@@ -16,7 +16,8 @@
package org.apache.nutch.net;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
import junit.framework.TestCase;
@@ -82,7 +83,7 @@
}
private void normalizeTest(String weird, String normal) throws Exception {
- assertEquals(normal, new UrlNormalizerFactory(new NutchConf()).getNormalizer().normalize(weird));
+ assertEquals(normal, new UrlNormalizerFactory(NutchConfiguration.create()).getNormalizer().normalize(weird));
}
public static void main(String[] args) throws Exception {