You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by ol...@apache.org on 2008/11/11 14:50:27 UTC

svn commit: r713054 - in /incubator/droids/trunk/droids-norobots/src: main/java/org/apache/droids/norobots/NoRobotClient.java test/java/org/apache/droids/norobots/TestNorobotsClient.java

Author: olegk
Date: Tue Nov 11 06:50:27 2008
New Revision: 713054

URL: http://svn.apache.org/viewvc?rev=713054&view=rev
Log:
Make sure baseURI and robotsURI are always correctly initialized

Modified:
    incubator/droids/trunk/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java
    incubator/droids/trunk/droids-norobots/src/test/java/org/apache/droids/norobots/TestNorobotsClient.java

Modified: incubator/droids/trunk/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java?rev=713054&r1=713053&r2=713054&view=diff
==============================================================================
--- incubator/droids/trunk/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java (original)
+++ incubator/droids/trunk/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java Tue Nov 11 06:50:27 2008
@@ -93,37 +93,32 @@
    * @param baseUrl of the site
    */
   public void parse(URI baseUri) throws IOException, NoRobotException {
-    URI uri;
-    try {
-      uri = baseUri.resolve(new URI("robots.txt"));
-    } catch (URISyntaxException ex) {
-      throw new NoRobotException("Invalid URI", ex);
-    }
+    URI uri = resolveURI(baseUri, "robots.txt");
     // fetch baseUrl+"robots.txt"
     if (!contentLoader.exists(uri)) {
       return;
     }
     InputStream instream = contentLoader.load(uri);
-    parseText(instream);
-    robotsURI = uri;
+    doParseText(instream);
     baseURI = baseUri;
+    robotsURI = uri;
   }
 
-  public void parseText(InputStream instream) throws IOException {
-    baseURI = null;
-    robotsURI = null;
-    try {
-      Map<String, RulesEngine> map = doParse(instream);
-      this.rules = map.get(this.userAgent);
-      if (this.rules == null) {
-        this.rules = new RulesEngine();
-      }
-      this.wildcardRules = map.get("*");
-      if (this.wildcardRules == null) {
-        this.wildcardRules = new RulesEngine();
-      }
-    } finally {
-      instream.close();
+  public void parseText(InputStream instream) throws IOException, NoRobotException {
+    doParseText(instream);
+    baseURI = createURI("/");
+    robotsURI = resolveURI(baseURI, "robots.txt");
+  }
+
+  private void doParseText(InputStream instream) throws IOException {
+    Map<String, RulesEngine> map = parse(instream);
+    this.rules = map.get(this.userAgent);
+    if (this.rules == null) {
+      this.rules = new RulesEngine();
+    }
+    this.wildcardRules = map.get("*");
+    if (this.wildcardRules == null) {
+      this.wildcardRules = new RulesEngine();
     }
   }
 
@@ -218,32 +213,28 @@
    * @throws IllegalStateException when parse has not been called
    */
   public boolean isUrlAllowed(URI uri) throws IllegalStateException, IllegalArgumentException {
-    if(rules == null) {
+    if(rules == null || baseURI == null || robotsURI == null) {
       throw new IllegalStateException("You must call parse before you call this method.  ");
     }
 
-    if (baseURI != null && 
-        (!equals(baseURI.getHost(), uri.getHost()) ||
-         baseURI.getPort() != uri.getPort() ||
-        !equals(baseURI.getScheme(), uri.getScheme())))
+    if (!equals(baseURI.getHost(), uri.getHost()) ||
+        baseURI.getPort() != uri.getPort() ||
+        !equals(baseURI.getScheme(), uri.getScheme()))
     {
       throw new IllegalArgumentException(
           "Illegal to use a different url, " + uri.toString() + 
           ",  for this robots.txt: " + baseURI.toString());
     }
-    
     if (uri.equals(robotsURI)) {
       return true;
     }
     
     String path = uri.getPath();
-    if (baseURI != null) {
-      String basepath = baseURI.getPath();
-      if (path.startsWith(basepath)) {
-        path = path.substring(basepath.length());
-        if (!path.startsWith("/")) {
-          path = "/" + path;
-        }
+    String basepath = baseURI.getPath();
+    if (path.startsWith(basepath)) {
+      path = path.substring(basepath.length());
+      if (!path.startsWith("/")) {
+        path = "/" + path;
       }
     }
     
@@ -264,9 +255,28 @@
     return allowed.booleanValue();
   }
 
+  
+  /*
+   * Utility methods. 
+   */
+  private static URI createURI(String s) throws NoRobotException {
+    try {
+      return new URI(s);
+    } catch (URISyntaxException ex) {
+      throw new NoRobotException("Invalid URI: " + ex.getInput());
+    }
+  }
+  
+  private static URI resolveURI(URI base, String s) throws NoRobotException {
+    try {
+      return base.resolve(new URI(s));
+    } catch (URISyntaxException ex) {
+      throw new NoRobotException("Invalid URI: " + ex.getInput());
+    }
+  }
+  
   private static boolean equals(final Object obj1, final Object obj2) {
     return obj1 == null ? obj2 == null : obj1.equals(obj2);
   }
-
   
 }

Modified: incubator/droids/trunk/droids-norobots/src/test/java/org/apache/droids/norobots/TestNorobotsClient.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-norobots/src/test/java/org/apache/droids/norobots/TestNorobotsClient.java?rev=713054&r1=713053&r2=713054&view=diff
==============================================================================
--- incubator/droids/trunk/droids-norobots/src/test/java/org/apache/droids/norobots/TestNorobotsClient.java (original)
+++ incubator/droids/trunk/droids-norobots/src/test/java/org/apache/droids/norobots/TestNorobotsClient.java Tue Nov 11 06:50:27 2008
@@ -1,10 +1,8 @@
 package org.apache.droids.norobots;
 
 import java.io.ByteArrayInputStream;
-import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URISyntaxException;
-import java.net.URL;
 import java.util.Map;
 
 import junit.framework.Assert;