You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by mi...@apache.org on 2005/04/27 17:49:16 UTC

svn commit: r164995 - in /lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler: CrawlerConfiguration.java IterativeHTMLCrawler.java

Author: michi
Date: Wed Apr 27 08:49:15 2005
New Revision: 164995

URL: http://svn.apache.org/viewcvs?rev=164995&view=rev
Log:
resolving of paths fixed

Modified:
    lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/CrawlerConfiguration.java
    lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java

Modified: lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/CrawlerConfiguration.java
URL: http://svn.apache.org/viewcvs/lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/CrawlerConfiguration.java?rev=164995&r1=164994&r2=164995&view=diff
==============================================================================
--- lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/CrawlerConfiguration.java (original)
+++ lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/CrawlerConfiguration.java Wed Apr 27 08:49:15 2005
@@ -178,9 +178,9 @@
     }
 
     /**
-     * DOCUMENT ME!
+     * Get URI list path
      *
-     * @return DOCUMENT ME!
+     * @return URI list path
      */
     public String getURIList() {
         log.debug(".getURIList(): " + uri_list);
@@ -189,6 +189,17 @@
     }
 
     /**
+     * Get URI list path as absolute path
+     *
+     * @return URI list path
+     */
+    public String getURIListResolved() {
+        log.debug(".getURIList(): " + uri_list);
+
+        return resolvePath(uri_list);
+    }
+
+    /**
      * Get htdocs-dump-dir/@src
      *
      * @return htdocs-dump-dir/@src
@@ -200,6 +211,16 @@
     }
 
     /**
+     * Get htdocs-dump-dir/@src as absolute path
+     *
+     * @return htdocs-dump-dir/@src
+     */
+    public String getHTDocsDumpDirResolved() {
+
+        return resolvePath(htdocs_dump_dir);
+    }
+
+    /**
      * Get robots/@src
      *
      * @return robots/@src
@@ -208,6 +229,17 @@
         log.debug(robots_file);
 
         return robots_file;
+    }
+
+    /**
+     * Get robots/@src as absolute path
+     *
+     * @return robots/@src
+     */
+    public String getRobotsFileResolved() {
+        log.debug(robots_file);
+
+        return resolvePath(robots_file);
     }
 
     /**

Modified: lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java
URL: http://svn.apache.org/viewcvs/lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java?rev=164995&r1=164994&r2=164995&view=diff
==============================================================================
--- lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java (original)
+++ lenya/branches/BRANCH_1_2_X/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java Wed Apr 27 08:49:15 2005
@@ -91,16 +91,20 @@
         CrawlerConfiguration ce = new CrawlerConfiguration(config.getAbsolutePath());
 
 
-        this.url_list_file = ce.getURIList();
-        this.html_dump_directory = ce.getHTDocsDumpDir();
+        this.url_list_file = ce.getURIListResolved();
+        log.error("URI list file: " + this.url_list_file);
+
+        this.html_dump_directory = ce.getHTDocsDumpDirResolved();
+        log.error("HTDocs Dump Dir: " + this.html_dump_directory);
 
         robot = new RobotExclusion(ce.getUserAgent());
 
-        String robots_file = ce.getRobotsFile();
+        String robots_file = ce.getRobotsFileResolved();
+        log.error("Robots File: " + robots_file);
         String robots_domain = ce.getRobotsDomain();
         if (robots_file != null && robots_domain != null) {
             log.debug(robots_file + " " + robots_domain);
-            robot.addLocalEntries(robots_domain, new File(ce.resolvePath(robots_file)));
+            robot.addLocalEntries(robots_domain, new File(robots_file));
         }
     }
 



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@lenya.apache.org
For additional commands, e-mail: commits-help@lenya.apache.org