You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by ol...@apache.org on 2008/11/11 14:50:27 UTC
svn commit: r713054 - in /incubator/droids/trunk/droids-norobots/src:
main/java/org/apache/droids/norobots/NoRobotClient.java
test/java/org/apache/droids/norobots/TestNorobotsClient.java
Author: olegk
Date: Tue Nov 11 06:50:27 2008
New Revision: 713054
URL: http://svn.apache.org/viewvc?rev=713054&view=rev
Log:
Make sure baseURI and robotsURI are always correctly initialized
Modified:
incubator/droids/trunk/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java
incubator/droids/trunk/droids-norobots/src/test/java/org/apache/droids/norobots/TestNorobotsClient.java
Modified: incubator/droids/trunk/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java?rev=713054&r1=713053&r2=713054&view=diff
==============================================================================
--- incubator/droids/trunk/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java (original)
+++ incubator/droids/trunk/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java Tue Nov 11 06:50:27 2008
@@ -93,37 +93,32 @@
* @param baseUrl of the site
*/
public void parse(URI baseUri) throws IOException, NoRobotException {
- URI uri;
- try {
- uri = baseUri.resolve(new URI("robots.txt"));
- } catch (URISyntaxException ex) {
- throw new NoRobotException("Invalid URI", ex);
- }
+ URI uri = resolveURI(baseUri, "robots.txt");
// fetch baseUrl+"robots.txt"
if (!contentLoader.exists(uri)) {
return;
}
InputStream instream = contentLoader.load(uri);
- parseText(instream);
- robotsURI = uri;
+ doParseText(instream);
baseURI = baseUri;
+ robotsURI = uri;
}
- public void parseText(InputStream instream) throws IOException {
- baseURI = null;
- robotsURI = null;
- try {
- Map<String, RulesEngine> map = doParse(instream);
- this.rules = map.get(this.userAgent);
- if (this.rules == null) {
- this.rules = new RulesEngine();
- }
- this.wildcardRules = map.get("*");
- if (this.wildcardRules == null) {
- this.wildcardRules = new RulesEngine();
- }
- } finally {
- instream.close();
+ public void parseText(InputStream instream) throws IOException, NoRobotException {
+ doParseText(instream);
+ baseURI = createURI("/");
+ robotsURI = resolveURI(baseURI, "robots.txt");
+ }
+
+ private void doParseText(InputStream instream) throws IOException {
+ Map<String, RulesEngine> map = parse(instream);
+ this.rules = map.get(this.userAgent);
+ if (this.rules == null) {
+ this.rules = new RulesEngine();
+ }
+ this.wildcardRules = map.get("*");
+ if (this.wildcardRules == null) {
+ this.wildcardRules = new RulesEngine();
}
}
@@ -218,32 +213,28 @@
* @throws IllegalStateException when parse has not been called
*/
public boolean isUrlAllowed(URI uri) throws IllegalStateException, IllegalArgumentException {
- if(rules == null) {
+ if(rules == null || baseURI == null || robotsURI == null) {
throw new IllegalStateException("You must call parse before you call this method. ");
}
- if (baseURI != null &&
- (!equals(baseURI.getHost(), uri.getHost()) ||
- baseURI.getPort() != uri.getPort() ||
- !equals(baseURI.getScheme(), uri.getScheme())))
+ if (!equals(baseURI.getHost(), uri.getHost()) ||
+ baseURI.getPort() != uri.getPort() ||
+ !equals(baseURI.getScheme(), uri.getScheme()))
{
throw new IllegalArgumentException(
"Illegal to use a different url, " + uri.toString() +
", for this robots.txt: " + baseURI.toString());
}
-
if (uri.equals(robotsURI)) {
return true;
}
String path = uri.getPath();
- if (baseURI != null) {
- String basepath = baseURI.getPath();
- if (path.startsWith(basepath)) {
- path = path.substring(basepath.length());
- if (!path.startsWith("/")) {
- path = "/" + path;
- }
+ String basepath = baseURI.getPath();
+ if (path.startsWith(basepath)) {
+ path = path.substring(basepath.length());
+ if (!path.startsWith("/")) {
+ path = "/" + path;
}
}
@@ -264,9 +255,28 @@
return allowed.booleanValue();
}
+
+ /*
+ * Utility methods.
+ */
+ private static URI createURI(String s) throws NoRobotException {
+ try {
+ return new URI(s);
+ } catch (URISyntaxException ex) {
+ throw new NoRobotException("Invalid URI: " + ex.getInput());
+ }
+ }
+
+ private static URI resolveURI(URI base, String s) throws NoRobotException {
+ try {
+ return base.resolve(new URI(s));
+ } catch (URISyntaxException ex) {
+ throw new NoRobotException("Invalid URI: " + ex.getInput());
+ }
+ }
+
private static boolean equals(final Object obj1, final Object obj2) {
return obj1 == null ? obj2 == null : obj1.equals(obj2);
}
-
}
Modified: incubator/droids/trunk/droids-norobots/src/test/java/org/apache/droids/norobots/TestNorobotsClient.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-norobots/src/test/java/org/apache/droids/norobots/TestNorobotsClient.java?rev=713054&r1=713053&r2=713054&view=diff
==============================================================================
--- incubator/droids/trunk/droids-norobots/src/test/java/org/apache/droids/norobots/TestNorobotsClient.java (original)
+++ incubator/droids/trunk/droids-norobots/src/test/java/org/apache/droids/norobots/TestNorobotsClient.java Tue Nov 11 06:50:27 2008
@@ -1,10 +1,8 @@
package org.apache.droids.norobots;
import java.io.ByteArrayInputStream;
-import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
-import java.net.URL;
import java.util.Map;
import junit.framework.Assert;