You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by gr...@apache.org on 2005/02/25 02:04:30 UTC
svn commit: r155284 -
lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java
Author: gregor
Date: Thu Feb 24 17:04:28 2005
New Revision: 155284
URL: http://svn.apache.org/viewcvs?view=rev&rev=155284
Log:
Fixed NPE reported by Phillipp Bolle
Modified:
lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java
Modified: lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java
URL: http://svn.apache.org/viewcvs/lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java?view=diff&r1=155283&r2=155284
==============================================================================
--- lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java (original)
+++ lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java Thu Feb 24 17:04:28 2005
@@ -15,7 +15,7 @@
*
*/
-/* $Id: IterativeHTMLCrawler.java,v 1.28 2004/07/10 23:18:23 andreas Exp $ */
+/* $Id$ */
package org.apache.lenya.search.crawler;
@@ -63,11 +63,6 @@
new IterativeHTMLCrawler(new File(args[0])).crawl(new URL(ce.getBaseURL()), ce.getScopeURL());
} else {
System.err.println("Usage: IterativeHTMLCrawler crawler.xconf");
-/*
- new IterativeHTMLCrawler(ce.resolvePath(ce.getURIList()),
- ce.resolvePath(ce.getHTDocsDumpDir()), ce.getUserAgent()).crawl(new URL(
- ce.getBaseURL()), ce.getScopeURL());
-*/
}
} catch (MalformedURLException e) {
log.error("" + e);
@@ -90,15 +85,14 @@
/**
* Creates a new IterativeHTMLCrawler object.
- *
* @param config Configuration File
*/
public IterativeHTMLCrawler(File config) {
CrawlerConfiguration ce = new CrawlerConfiguration(config.getAbsolutePath());
- this.url_list_file = ce.resolvePath(ce.getURIList());
- this.html_dump_directory = ce.resolvePath(ce.getHTDocsDumpDir());
+ this.url_list_file = ce.getURIList();
+ this.html_dump_directory = ce.getHTDocsDumpDir();
robot = new RobotExclusion(ce.getUserAgent());
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@lenya.apache.org
For additional commands, e-mail: commits-help@lenya.apache.org