You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by mi...@apache.org on 2005/04/27 17:49:16 UTC
svn commit: r164995 - in
/lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler:
CrawlerConfiguration.java IterativeHTMLCrawler.java
Author: michi
Date: Wed Apr 27 08:49:15 2005
New Revision: 164995
URL: http://svn.apache.org/viewcvs?rev=164995&view=rev
Log:
resolving of paths fixed
Modified:
lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/CrawlerConfiguration.java
lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java
Modified: lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/CrawlerConfiguration.java
URL: http://svn.apache.org/viewcvs/lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/CrawlerConfiguration.java?rev=164995&r1=164994&r2=164995&view=diff
==============================================================================
--- lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/CrawlerConfiguration.java (original)
+++ lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/CrawlerConfiguration.java Wed Apr 27 08:49:15 2005
@@ -178,9 +178,9 @@
}
/**
- * DOCUMENT ME!
+ * Get URI list path
*
- * @return DOCUMENT ME!
+ * @return URI list path
*/
public String getURIList() {
log.debug(".getURIList(): " + uri_list);
@@ -189,6 +189,17 @@
}
/**
+ * Get URI list path as absolute path
+ *
+ * @return URI list path
+ */
+ public String getURIListResolved() {
+ log.debug(".getURIList(): " + uri_list);
+
+ return resolvePath(uri_list);
+ }
+
+ /**
* Get htdocs-dump-dir/@src
*
* @return htdocs-dump-dir/@src
@@ -200,6 +211,16 @@
}
/**
+ * Get htdocs-dump-dir/@src as absolute path
+ *
+ * @return htdocs-dump-dir/@src
+ */
+ public String getHTDocsDumpDirResolved() {
+
+ return resolvePath(htdocs_dump_dir);
+ }
+
+ /**
* Get robots/@src
*
* @return robots/@src
@@ -208,6 +229,17 @@
log.debug(robots_file);
return robots_file;
+ }
+
+ /**
+ * Get robots/@src as absolute path
+ *
+ * @return robots/@src
+ */
+ public String getRobotsFileResolved() {
+ log.debug(robots_file);
+
+ return resolvePath(robots_file);
}
/**
Modified: lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java
URL: http://svn.apache.org/viewcvs/lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java?rev=164995&r1=164994&r2=164995&view=diff
==============================================================================
--- lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java (original)
+++ lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java Wed Apr 27 08:49:15 2005
@@ -91,16 +91,20 @@
CrawlerConfiguration ce = new CrawlerConfiguration(config.getAbsolutePath());
- this.url_list_file = ce.getURIList();
- this.html_dump_directory = ce.getHTDocsDumpDir();
+ this.url_list_file = ce.getURIListResolved();
+ log.error("URI list file: " + this.url_list_file);
+
+ this.html_dump_directory = ce.getHTDocsDumpDirResolved();
+ log.error("HTDocs Dump Dir: " + this.html_dump_directory);
robot = new RobotExclusion(ce.getUserAgent());
- String robots_file = ce.getRobotsFile();
+ String robots_file = ce.getRobotsFileResolved();
+ log.error("Robots File: " + robots_file);
String robots_domain = ce.getRobotsDomain();
if (robots_file != null && robots_domain != null) {
log.debug(robots_file + " " + robots_domain);
- robot.addLocalEntries(robots_domain, new File(ce.resolvePath(robots_file)));
+ robot.addLocalEntries(robots_domain, new File(robots_file));
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@lenya.apache.org
For additional commands, e-mail: commits-help@lenya.apache.org