You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by to...@apache.org on 2012/12/18 08:48:00 UTC
svn commit: r1423339 [2/5] - in /incubator/droids/branches/0.2.x-cleanup:
droids-crawler/ droids-crawler/src/main/java/org/apache/droids/crawler/
droids-crawler/src/main/java/org/apache/droids/protocol/http/
droids-crawler/src/test/java/org/apache/droi...
Modified: incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/DroidsFactory.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/DroidsFactory.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/DroidsFactory.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/DroidsFactory.java Tue Dec 18 08:47:39 2012
@@ -1,8 +1,3 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
@@ -42,127 +37,125 @@ import org.apache.droids.robot.crawler.R
import org.apache.droids.tika.TikaDocumentParser;
/**
- *
* Helper class for creating defaults.
- *
*/
public class DroidsFactory {
- public static ParserFactory createDefaultParserFactory() {
- ParserFactory parserFactory = new ParserFactory();
- TikaDocumentParser tikaParser = new TikaDocumentParser();
- parserFactory.getMap().put("text/html", tikaParser);
- return parserFactory;
- }
-
- public static ProtocolFactory createDefaultProtocolFactory() {
- ProtocolFactory protocolFactory = new ProtocolFactory();
- HttpProtocol httpProtocol = new HttpProtocol();
- httpProtocol.setForceAllow(true);
-
- protocolFactory.getMap().put("http", httpProtocol);
- return protocolFactory;
- }
-
- public static URLFiltersFactory createDefaultURLFiltersFactory() {
- URLFiltersFactory filtersFactory = new URLFiltersFactory();
- URLFilter defaultURLFilter = new URLFilter() {
-
- public String filter(String urlString) {
- return urlString;
- }
-
- };
- filtersFactory.getMap().put("default", defaultURLFilter);
- return filtersFactory;
- }
-
- public static HandlerFactory createDefaultHandlerFactory(
- Handler defaultHandler) {
- HandlerFactory handlerFactory = new HandlerFactory();
- handlerFactory.getMap().put("default", defaultHandler);
- return handlerFactory;
- }
-
- public static Droid<Link> createSimpleSaveCrawlingDroid(String targetURI) {
- ParserFactory parserFactory = createDefaultParserFactory();
- ProtocolFactory protocolFactory = createDefaultProtocolFactory();
- URLFiltersFactory filtersFactory = createDefaultURLFiltersFactory();
-
- SimpleDelayTimer simpleDelayTimer = new SimpleDelayTimer();
- simpleDelayTimer.setDelayMillis(100);
-
- SimpleTaskQueueWithHistory<Link> simpleQueue = new SimpleTaskQueueWithHistory<Link>();
-
- SequentialTaskMaster<Link> taskMaster = new SequentialTaskMaster<Link>();
- taskMaster.setDelayTimer(simpleDelayTimer);
- taskMaster.setExceptionHandler(new DefaultTaskExceptionHandler());
-
- CrawlingDroid crawler = new SaveCrawlingDroid(simpleQueue, taskMaster,
- new SaveHandler());
- crawler.setFiltersFactory(filtersFactory);
- crawler.setParserFactory(parserFactory);
- crawler.setProtocolFactory(protocolFactory);
-
- Collection<String> initialLocations = new ArrayList<String>();
- initialLocations.add(targetURI);
- crawler.setInitialLocations(initialLocations);
- return crawler;
- }
-
- public static Droid<Link> createSimpleReportCrawlingDroid(String targetURI) {
- ParserFactory parserFactory = createDefaultParserFactory();
- ProtocolFactory protocolFactory = createDefaultProtocolFactory();
- URLFiltersFactory filtersFactory = createDefaultURLFiltersFactory();
-
- SimpleDelayTimer simpleDelayTimer = new SimpleDelayTimer();
- simpleDelayTimer.setDelayMillis(100);
-
- SequentialTaskMaster<Link> taskMaster = new SequentialTaskMaster<Link>();
- // MultiThreadedTaskMaster<Link> taskMaster = new
- // MultiThreadedTaskMaster<Link>();
- taskMaster.setDelayTimer(simpleDelayTimer);
- taskMaster.setExceptionHandler(new DefaultTaskExceptionHandler());
-
- Queue<Link> queue = new LinkedList<Link>();
-
- CrawlingDroid crawler = new ReportCrawlingDroid(queue, taskMaster);
- crawler.setFiltersFactory(filtersFactory);
- crawler.setParserFactory(parserFactory);
- crawler.setProtocolFactory(protocolFactory);
-
- Collection<String> initialLocations = new ArrayList<String>();
- initialLocations.add(targetURI);
- crawler.setInitialLocations(initialLocations);
- return crawler;
- }
-
- public static Droid<Link> createSimpleExceptionCrawlingDroid(
- String targetURI) {
- ParserFactory parserFactory = createDefaultParserFactory();
- ProtocolFactory protocolFactory = createDefaultProtocolFactory();
- URLFiltersFactory filtersFactory = createDefaultURLFiltersFactory();
-
- SimpleDelayTimer simpleDelayTimer = new SimpleDelayTimer();
- simpleDelayTimer.setDelayMillis(100);
-
- Queue<Link> queue = new LinkedList<Link>();
-
- SequentialTaskMaster<Link> taskMaster = new SequentialTaskMaster<Link>();
- // MultiThreadedTaskMaster<Link> taskMaster = new
- // MultiThreadedTaskMaster<Link>();
- taskMaster.setDelayTimer(simpleDelayTimer);
- taskMaster.setExceptionHandler(new DefaultTaskExceptionHandler());
-
- CrawlingDroid crawler = new ExceptionCrawlingDroid(queue, taskMaster);
- crawler.setFiltersFactory(filtersFactory);
- crawler.setParserFactory(parserFactory);
- crawler.setProtocolFactory(protocolFactory);
-
- Collection<String> initialLocations = new ArrayList<String>();
- initialLocations.add(targetURI);
- crawler.setInitialLocations(initialLocations);
- return crawler;
- }
+ public static ParserFactory createDefaultParserFactory() {
+ ParserFactory parserFactory = new ParserFactory();
+ TikaDocumentParser tikaParser = new TikaDocumentParser();
+ parserFactory.getMap().put("text/html", tikaParser);
+ return parserFactory;
+ }
+
+ public static ProtocolFactory createDefaultProtocolFactory() {
+ ProtocolFactory protocolFactory = new ProtocolFactory();
+ HttpProtocol httpProtocol = new HttpProtocol();
+ httpProtocol.setForceAllow(true);
+
+ protocolFactory.getMap().put("http", httpProtocol);
+ return protocolFactory;
+ }
+
+ public static URLFiltersFactory createDefaultURLFiltersFactory() {
+ URLFiltersFactory filtersFactory = new URLFiltersFactory();
+ URLFilter defaultURLFilter = new URLFilter() {
+
+ public String filter(String urlString) {
+ return urlString;
+ }
+
+ };
+ filtersFactory.getMap().put("default", defaultURLFilter);
+ return filtersFactory;
+ }
+
+ public static HandlerFactory createDefaultHandlerFactory(
+ Handler defaultHandler) {
+ HandlerFactory handlerFactory = new HandlerFactory();
+ handlerFactory.getMap().put("default", defaultHandler);
+ return handlerFactory;
+ }
+
+ public static Droid<Link> createSimpleSaveCrawlingDroid(String targetURI) {
+ ParserFactory parserFactory = createDefaultParserFactory();
+ ProtocolFactory protocolFactory = createDefaultProtocolFactory();
+ URLFiltersFactory filtersFactory = createDefaultURLFiltersFactory();
+
+ SimpleDelayTimer simpleDelayTimer = new SimpleDelayTimer();
+ simpleDelayTimer.setDelayMillis(100);
+
+ SimpleTaskQueueWithHistory<Link> simpleQueue = new SimpleTaskQueueWithHistory<Link>();
+
+ SequentialTaskMaster<Link> taskMaster = new SequentialTaskMaster<Link>();
+ taskMaster.setDelayTimer(simpleDelayTimer);
+ taskMaster.setExceptionHandler(new DefaultTaskExceptionHandler());
+
+ CrawlingDroid crawler = new SaveCrawlingDroid(simpleQueue, taskMaster,
+ new SaveHandler());
+ crawler.setFiltersFactory(filtersFactory);
+ crawler.setParserFactory(parserFactory);
+ crawler.setProtocolFactory(protocolFactory);
+
+ Collection<String> initialLocations = new ArrayList<String>();
+ initialLocations.add(targetURI);
+ crawler.setInitialLocations(initialLocations);
+ return crawler;
+ }
+
+ public static Droid<Link> createSimpleReportCrawlingDroid(String targetURI) {
+ ParserFactory parserFactory = createDefaultParserFactory();
+ ProtocolFactory protocolFactory = createDefaultProtocolFactory();
+ URLFiltersFactory filtersFactory = createDefaultURLFiltersFactory();
+
+ SimpleDelayTimer simpleDelayTimer = new SimpleDelayTimer();
+ simpleDelayTimer.setDelayMillis(100);
+
+ SequentialTaskMaster<Link> taskMaster = new SequentialTaskMaster<Link>();
+ // MultiThreadedTaskMaster<Link> taskMaster = new
+ // MultiThreadedTaskMaster<Link>();
+ taskMaster.setDelayTimer(simpleDelayTimer);
+ taskMaster.setExceptionHandler(new DefaultTaskExceptionHandler());
+
+ Queue<Link> queue = new LinkedList<Link>();
+
+ CrawlingDroid crawler = new ReportCrawlingDroid(queue, taskMaster);
+ crawler.setFiltersFactory(filtersFactory);
+ crawler.setParserFactory(parserFactory);
+ crawler.setProtocolFactory(protocolFactory);
+
+ Collection<String> initialLocations = new ArrayList<String>();
+ initialLocations.add(targetURI);
+ crawler.setInitialLocations(initialLocations);
+ return crawler;
+ }
+
+ public static Droid<Link> createSimpleExceptionCrawlingDroid(
+ String targetURI) {
+ ParserFactory parserFactory = createDefaultParserFactory();
+ ProtocolFactory protocolFactory = createDefaultProtocolFactory();
+ URLFiltersFactory filtersFactory = createDefaultURLFiltersFactory();
+
+ SimpleDelayTimer simpleDelayTimer = new SimpleDelayTimer();
+ simpleDelayTimer.setDelayMillis(100);
+
+ Queue<Link> queue = new LinkedList<Link>();
+
+ SequentialTaskMaster<Link> taskMaster = new SequentialTaskMaster<Link>();
+ // MultiThreadedTaskMaster<Link> taskMaster = new
+ // MultiThreadedTaskMaster<Link>();
+ taskMaster.setDelayTimer(simpleDelayTimer);
+ taskMaster.setExceptionHandler(new DefaultTaskExceptionHandler());
+
+ CrawlingDroid crawler = new ExceptionCrawlingDroid(queue, taskMaster);
+ crawler.setFiltersFactory(filtersFactory);
+ crawler.setParserFactory(parserFactory);
+ crawler.setProtocolFactory(protocolFactory);
+
+ Collection<String> initialLocations = new ArrayList<String>();
+ initialLocations.add(targetURI);
+ crawler.setInitialLocations(initialLocations);
+ return crawler;
+ }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/ExceptionCrawlingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/ExceptionCrawlingDroid.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/ExceptionCrawlingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/ExceptionCrawlingDroid.java Tue Dec 18 08:47:39 2012
@@ -19,6 +19,7 @@
package org.apache.droids.examples;
import java.util.Queue;
+
import org.apache.droids.api.Handler;
import org.apache.droids.api.Link;
import org.apache.droids.api.TaskMaster;
@@ -29,17 +30,17 @@ import org.apache.droids.robot.crawler.C
public class ExceptionCrawlingDroid extends CrawlingDroid {
- public ExceptionCrawlingDroid(Queue<Link> queue, TaskMaster<Link> taskMaster) {
- super(queue, taskMaster);
- }
-
- @Override
- public Worker<Link> getNewWorker() {
- final CrawlingWorker worker = new CrawlingWorker(this);
- Handler testHandler = new ExceptionReportHandler();
- worker.setHandlerFactory(DroidsFactory
- .createDefaultHandlerFactory(testHandler));
- return worker;
- }
+ public ExceptionCrawlingDroid(Queue<Link> queue, TaskMaster<Link> taskMaster) {
+ super(queue, taskMaster);
+ }
+
+ @Override
+ public Worker<Link> getNewWorker() {
+ final CrawlingWorker worker = new CrawlingWorker(this);
+ Handler testHandler = new ExceptionReportHandler();
+ worker.setHandlerFactory(DroidsFactory
+ .createDefaultHandlerFactory(testHandler));
+ return worker;
+ }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/FileRenameDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/FileRenameDroid.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/FileRenameDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/FileRenameDroid.java Tue Dec 18 08:47:39 2012
@@ -22,6 +22,7 @@ import java.util.Collection;
import java.util.LinkedHashMap;
import com.google.common.base.Preconditions;
+
import java.util.LinkedList;
import java.util.Queue;
@@ -34,102 +35,102 @@ import org.slf4j.LoggerFactory;
public class FileRenameDroid extends AbstractDroid<FileTask> {
- private static final Logger LOG = LoggerFactory
- .getLogger(FileRenameDroid.class);
- private Collection<File> initialFiles;
-
- public FileRenameDroid(Queue<FileTask> queue, TaskMaster<FileTask> taskMaster) {
- super(queue, taskMaster);
- }
-
- public void setInitialFiles(Collection<File> initialFiles) {
- this.initialFiles = initialFiles;
- }
-
- public void init() {
- Preconditions.checkNotNull(initialFiles);
- Preconditions.checkState(!initialFiles.isEmpty());
- for (File file : initialFiles) {
- queue.add(new FileTask(file, 0));
- }
- }
-
- public LinkedHashMap<String, String> cleaner = null;
-
- public LinkedHashMap<String, String> getCleaner() {
- if (null == cleaner) {
- populateCleaner();
- }
- return cleaner;
- }
-
- public void setCleaner(LinkedHashMap<String, String> cleaner) {
- this.cleaner = cleaner;
- }
-
- private void populateCleaner() {
- cleaner = new LinkedHashMap<String, String>();
- cleaner.put(" ", ".");
- cleaner.put(".-.", ".");
- cleaner.put(",", "");
- }
-
- public void finished() {
- System.out.println("FINISHED!!!");
- }
-
- public RenameWorker getNewWorker() {
- return new RenameWorker();
- }
-
- public class RenameWorker implements Worker<FileTask> {
-
- String replace;
-
- public void execute(FileTask task) {
-
- for (String pattern : getCleaner().keySet()) {
- replace = getCleaner().get(pattern);
-
- cleanFileName(task.getFile(), pattern, replace);
- }
- }
-
- private void cleanFileName(File file, String pattern, String replace) {
- LOG.debug("Processing: " + file.getName());
- LOG.debug("finding pattern: " + pattern);
- LOG.debug("replacing it with: " + replace);
- String fileName = file.getName();
- if (fileName.contains(pattern)
- || !fileName.toLowerCase().equals(fileName)) {
- LOG.debug("need to process this file: " + fileName + " in "
- + file.getAbsolutePath());
- File replacement = new File(fileName.substring(0, file
- .getAbsolutePath().indexOf(fileName))
- + fileName.replaceAll(pattern, replace).toLowerCase());
- LOG.debug("Renaming to: " + replacement.getName() + " in "
- + replacement.getAbsolutePath());
-
- LOG.info("TODO! actually do the rename!");
- // TODO -- actually do the rename...file.renameTo(replacement);
- }
- }
- }
-
- // ------------------------------------------------------------------
- // ------------------------------------------------------------------
- public static void main(String[] args) {
- MultiThreadedTaskMaster<FileTask> taskMaster = new MultiThreadedTaskMaster<FileTask>();
- taskMaster.setPoolSize(3);
-
- Queue<FileTask> queue = new LinkedList<FileTask>();
-
- Collection<File> files = new ArrayList<File>();
- files.add(new File(args[0]));
-
- FileRenameDroid simple = new FileRenameDroid(queue, taskMaster);
- simple.setInitialFiles(files);
- simple.init();
- simple.start(); // TODO? perhaps start internally calls init()?
- }
+ private static final Logger LOG = LoggerFactory
+ .getLogger(FileRenameDroid.class);
+ private Collection<File> initialFiles;
+
+ public FileRenameDroid(Queue<FileTask> queue, TaskMaster<FileTask> taskMaster) {
+ super(queue, taskMaster);
+ }
+
+ public void setInitialFiles(Collection<File> initialFiles) {
+ this.initialFiles = initialFiles;
+ }
+
+ public void init() {
+ Preconditions.checkNotNull(initialFiles);
+ Preconditions.checkState(!initialFiles.isEmpty());
+ for (File file : initialFiles) {
+ queue.add(new FileTask(file, 0));
+ }
+ }
+
+ public LinkedHashMap<String, String> cleaner = null;
+
+ public LinkedHashMap<String, String> getCleaner() {
+ if (null == cleaner) {
+ populateCleaner();
+ }
+ return cleaner;
+ }
+
+ public void setCleaner(LinkedHashMap<String, String> cleaner) {
+ this.cleaner = cleaner;
+ }
+
+ private void populateCleaner() {
+ cleaner = new LinkedHashMap<String, String>();
+ cleaner.put(" ", ".");
+ cleaner.put(".-.", ".");
+ cleaner.put(",", "");
+ }
+
+ public void finished() {
+ System.out.println("FINISHED!!!");
+ }
+
+ public RenameWorker getNewWorker() {
+ return new RenameWorker();
+ }
+
+ public class RenameWorker implements Worker<FileTask> {
+
+ String replace;
+
+ public void execute(FileTask task) {
+
+ for (String pattern : getCleaner().keySet()) {
+ replace = getCleaner().get(pattern);
+
+ cleanFileName(task.getFile(), pattern, replace);
+ }
+ }
+
+ private void cleanFileName(File file, String pattern, String replace) {
+ LOG.debug("Processing: " + file.getName());
+ LOG.debug("finding pattern: " + pattern);
+ LOG.debug("replacing it with: " + replace);
+ String fileName = file.getName();
+ if (fileName.contains(pattern)
+ || !fileName.toLowerCase().equals(fileName)) {
+ LOG.debug("need to process this file: " + fileName + " in "
+ + file.getAbsolutePath());
+ File replacement = new File(fileName.substring(0, file
+ .getAbsolutePath().indexOf(fileName))
+ + fileName.replaceAll(pattern, replace).toLowerCase());
+ LOG.debug("Renaming to: " + replacement.getName() + " in "
+ + replacement.getAbsolutePath());
+
+ LOG.info("TODO! actually do the rename!");
+ // TODO -- actually do the rename...file.renameTo(replacement);
+ }
+ }
+ }
+
+ // ------------------------------------------------------------------
+ // ------------------------------------------------------------------
+ public static void main(String[] args) {
+ MultiThreadedTaskMaster<FileTask> taskMaster = new MultiThreadedTaskMaster<FileTask>();
+ taskMaster.setPoolSize(3);
+
+ Queue<FileTask> queue = new LinkedList<FileTask>();
+
+ Collection<File> files = new ArrayList<File>();
+ files.add(new File(args[0]));
+
+ FileRenameDroid simple = new FileRenameDroid(queue, taskMaster);
+ simple.setInitialFiles(files);
+ simple.init();
+ simple.start(); // TODO? perhaps start internally calls init()?
+ }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/SaveCrawlingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/SaveCrawlingDroid.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/SaveCrawlingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/SaveCrawlingDroid.java Tue Dec 18 08:47:39 2012
@@ -19,6 +19,7 @@
package org.apache.droids.examples;
import java.util.Queue;
+
import org.apache.droids.api.Handler;
import org.apache.droids.api.Link;
import org.apache.droids.api.TaskMaster;
@@ -27,23 +28,23 @@ import org.apache.droids.robot.crawler.C
import org.apache.droids.robot.crawler.CrawlingWorker;
public class SaveCrawlingDroid extends CrawlingDroid {
- private final Handler defaultHandler;
+ private final Handler defaultHandler;
+
+ public SaveCrawlingDroid(Queue<Link> queue, TaskMaster<Link> taskMaster,
+ final Handler defaultHandlerForWorkerCreation) {
+
+ super(queue, taskMaster);
- public SaveCrawlingDroid(Queue<Link> queue, TaskMaster<Link> taskMaster,
- final Handler defaultHandlerForWorkerCreation) {
-
- super(queue, taskMaster);
-
- assert (defaultHandlerForWorkerCreation != null);
- this.defaultHandler = defaultHandlerForWorkerCreation;
- }
-
- @Override
- public Worker<Link> getNewWorker() {
- final CrawlingWorker worker = new CrawlingWorker(this);
- worker.setHandlerFactory(DroidsFactory
- .createDefaultHandlerFactory(this.defaultHandler));
- return worker;
- }
+ assert (defaultHandlerForWorkerCreation != null);
+ this.defaultHandler = defaultHandlerForWorkerCreation;
+ }
+
+ @Override
+ public Worker<Link> getNewWorker() {
+ final CrawlingWorker worker = new CrawlingWorker(this);
+ worker.setHandlerFactory(DroidsFactory
+ .createDefaultHandlerFactory(this.defaultHandler));
+ return worker;
+ }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/SysoutCrawlingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/SysoutCrawlingDroid.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/SysoutCrawlingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/SysoutCrawlingDroid.java Tue Dec 18 08:47:39 2012
@@ -19,6 +19,7 @@
package org.apache.droids.examples;
import java.util.Queue;
+
import org.apache.droids.api.Link;
import org.apache.droids.api.TaskMaster;
import org.apache.droids.api.Worker;
@@ -28,16 +29,16 @@ import org.apache.droids.robot.crawler.C
public class SysoutCrawlingDroid extends CrawlingDroid {
- public SysoutCrawlingDroid(Queue<Link> queue, TaskMaster<Link> taskMaster) {
- super(queue, taskMaster);
- }
-
- @Override
- public Worker<Link> getNewWorker() {
- final CrawlingWorker worker = new CrawlingWorker(this);
- worker.setHandlerFactory(DroidsFactory
- .createDefaultHandlerFactory(new SysoutHandler()));
- return worker;
- }
+ public SysoutCrawlingDroid(Queue<Link> queue, TaskMaster<Link> taskMaster) {
+ super(queue, taskMaster);
+ }
+
+ @Override
+ public Worker<Link> getNewWorker() {
+ final CrawlingWorker worker = new CrawlingWorker(this);
+ worker.setHandlerFactory(DroidsFactory
+ .createDefaultHandlerFactory(new SysoutHandler()));
+ return worker;
+ }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/cli/SimpleRuntime.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/cli/SimpleRuntime.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/cli/SimpleRuntime.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/cli/SimpleRuntime.java Tue Dec 18 08:47:39 2012
@@ -50,94 +50,93 @@ import org.apache.http.protocol.HTTP;
/**
* Simple Droids runtime that wires various components together in Java code
* without using a DI framework.
- *
*/
public class SimpleRuntime {
- private SimpleRuntime() {
- }
+ private SimpleRuntime() {
+ }
- public static void main(String[] args) throws Exception {
+ public static void main(String[] args) throws Exception {
- if (args.length < 1) {
- System.out.println("Please specify a URL to crawl");
- System.exit(-1);
- }
- String targetURL = args[0];
-
- // Create parser factory. Support basic HTML markup only
- ParserFactory parserFactory = new ParserFactory();
- TikaDocumentParser tikaParser = new TikaDocumentParser();
- parserFactory.getMap().put("text/html", tikaParser);
-
- // Create protocol factory. Support HTTP/S only.
- ProtocolFactory protocolFactory = new ProtocolFactory();
-
- // Create and configure HTTP client
- HttpParams params = new BasicHttpParams();
- HttpProtocolParamBean hppb = new HttpProtocolParamBean(params);
- HttpConnectionParamBean hcpb = new HttpConnectionParamBean(params);
- ConnManagerParamBean cmpb = new ConnManagerParamBean(params);
-
- // Set protocol parametes
- hppb.setVersion(HttpVersion.HTTP_1_1);
- hppb.setContentCharset(HTTP.ISO_8859_1);
- hppb.setUseExpectContinue(true);
- // Set connection parameters
- hcpb.setStaleCheckingEnabled(false);
- // Set connection manager parameters
- ConnPerRouteBean connPerRouteBean = new ConnPerRouteBean();
- connPerRouteBean.setDefaultMaxPerRoute(2);
- cmpb.setConnectionsPerRoute(connPerRouteBean);
-
- DroidsHttpClient httpclient = new DroidsHttpClient(params);
-
- HttpProtocol httpProtocol = new HttpProtocol(httpclient);
- protocolFactory.getMap().put("http", httpProtocol);
- protocolFactory.getMap().put("https", httpProtocol);
-
- // Create URL filter factory.
- URLFiltersFactory filtersFactory = new URLFiltersFactory();
- RegexURLFilter defaultURLFilter = new RegexURLFilter();
- defaultURLFilter.setFile("classpath:/regex-urlfilter.txt");
- filtersFactory.getMap().put("default", defaultURLFilter);
-
- // Create handler factory. Provide sysout handler only.
- HandlerFactory handlerFactory = new HandlerFactory();
- SysoutHandler defaultHandler = new SysoutHandler();
- handlerFactory.getMap().put("default", defaultHandler);
-
- // Create droid factory. Leave it empty for now.
- DroidFactory<Link> droidFactory = new DroidFactory<Link>();
-
- // Create default droid
- SimpleDelayTimer simpleDelayTimer = new SimpleDelayTimer();
- simpleDelayTimer.setDelayMillis(100);
-
- Queue<Link> simpleQueue = new LinkedList<Link>();
-
- SequentialTaskMaster<Link> taskMaster = new SequentialTaskMaster<Link>();
- taskMaster.setDelayTimer(simpleDelayTimer);
- taskMaster.setExceptionHandler(new DefaultTaskExceptionHandler());
-
- CrawlingDroid helloCrawler = new SysoutCrawlingDroid(simpleQueue,
- taskMaster);
- helloCrawler.setFiltersFactory(filtersFactory);
- helloCrawler.setParserFactory(parserFactory);
- helloCrawler.setProtocolFactory(protocolFactory);
-
- Collection<String> initialLocations = new ArrayList<String>();
- initialLocations.add(targetURL);
- helloCrawler.setInitialLocations(initialLocations);
-
- // Initialize and start the crawler
- helloCrawler.init();
- helloCrawler.start();
-
- // Await termination
- helloCrawler.getTaskMaster().awaitTermination(0, TimeUnit.MILLISECONDS);
- // Shut down the HTTP connection manager
- httpclient.getConnectionManager().shutdown();
- }
+ if (args.length < 1) {
+ System.out.println("Please specify a URL to crawl");
+ System.exit(-1);
+ }
+ String targetURL = args[0];
+
+ // Create parser factory. Support basic HTML markup only
+ ParserFactory parserFactory = new ParserFactory();
+ TikaDocumentParser tikaParser = new TikaDocumentParser();
+ parserFactory.getMap().put("text/html", tikaParser);
+
+ // Create protocol factory. Support HTTP/S only.
+ ProtocolFactory protocolFactory = new ProtocolFactory();
+
+ // Create and configure HTTP client
+ HttpParams params = new BasicHttpParams();
+ HttpProtocolParamBean hppb = new HttpProtocolParamBean(params);
+ HttpConnectionParamBean hcpb = new HttpConnectionParamBean(params);
+ ConnManagerParamBean cmpb = new ConnManagerParamBean(params);
+
+ // Set protocol parametes
+ hppb.setVersion(HttpVersion.HTTP_1_1);
+ hppb.setContentCharset(HTTP.ISO_8859_1);
+ hppb.setUseExpectContinue(true);
+ // Set connection parameters
+ hcpb.setStaleCheckingEnabled(false);
+ // Set connection manager parameters
+ ConnPerRouteBean connPerRouteBean = new ConnPerRouteBean();
+ connPerRouteBean.setDefaultMaxPerRoute(2);
+ cmpb.setConnectionsPerRoute(connPerRouteBean);
+
+ DroidsHttpClient httpclient = new DroidsHttpClient(params);
+
+ HttpProtocol httpProtocol = new HttpProtocol(httpclient);
+ protocolFactory.getMap().put("http", httpProtocol);
+ protocolFactory.getMap().put("https", httpProtocol);
+
+ // Create URL filter factory.
+ URLFiltersFactory filtersFactory = new URLFiltersFactory();
+ RegexURLFilter defaultURLFilter = new RegexURLFilter();
+ defaultURLFilter.setFile("classpath:/regex-urlfilter.txt");
+ filtersFactory.getMap().put("default", defaultURLFilter);
+
+ // Create handler factory. Provide sysout handler only.
+ HandlerFactory handlerFactory = new HandlerFactory();
+ SysoutHandler defaultHandler = new SysoutHandler();
+ handlerFactory.getMap().put("default", defaultHandler);
+
+ // Create droid factory. Leave it empty for now.
+ DroidFactory<Link> droidFactory = new DroidFactory<Link>();
+
+ // Create default droid
+ SimpleDelayTimer simpleDelayTimer = new SimpleDelayTimer();
+ simpleDelayTimer.setDelayMillis(100);
+
+ Queue<Link> simpleQueue = new LinkedList<Link>();
+
+ SequentialTaskMaster<Link> taskMaster = new SequentialTaskMaster<Link>();
+ taskMaster.setDelayTimer(simpleDelayTimer);
+ taskMaster.setExceptionHandler(new DefaultTaskExceptionHandler());
+
+ CrawlingDroid helloCrawler = new SysoutCrawlingDroid(simpleQueue,
+ taskMaster);
+ helloCrawler.setFiltersFactory(filtersFactory);
+ helloCrawler.setParserFactory(parserFactory);
+ helloCrawler.setProtocolFactory(protocolFactory);
+
+ Collection<String> initialLocations = new ArrayList<String>();
+ initialLocations.add(targetURL);
+ helloCrawler.setInitialLocations(initialLocations);
+
+ // Initialize and start the crawler
+ helloCrawler.init();
+ helloCrawler.start();
+
+ // Await termination
+ helloCrawler.getTaskMaster().awaitTermination(0, TimeUnit.MILLISECONDS);
+ // Shut down the HTTP connection manager
+ httpclient.getConnectionManager().shutdown();
+ }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/handler/ExceptionReportHandler.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/handler/ExceptionReportHandler.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/handler/ExceptionReportHandler.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/examples/handler/ExceptionReportHandler.java Tue Dec 18 08:47:39 2012
@@ -27,14 +27,14 @@ import org.apache.droids.handle.ReportHa
public class ExceptionReportHandler extends ReportHandler {
- public ExceptionReportHandler() {
- super();
- }
+ public ExceptionReportHandler() {
+ super();
+ }
- @Override
- public void handle(URI uri, ContentEntity entity) throws IOException,
- DroidsException {
- super.handle(uri, entity);
- }
+ @Override
+ public void handle(URI uri, ContentEntity entity) throws IOException,
+ DroidsException {
+ super.handle(uri, entity);
+ }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/localserver/LocalHttpServer.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/localserver/LocalHttpServer.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/localserver/LocalHttpServer.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/localserver/LocalHttpServer.java Tue Dec 18 08:47:39 2012
@@ -53,264 +53,255 @@ import org.slf4j.LoggerFactory;
/**
* Local HTTP server for tests that require one.
*/
-public class LocalHttpServer
-{
+public class LocalHttpServer {
- private final Logger log = LoggerFactory.getLogger(LocalHttpServer.class);
- /**
- * The local address to bind to. The host is an IP number rather than
- * "localhost" to avoid surprises on hosts that map "localhost" to an IPv6
- * address or something else. The port is 0 to let the system pick one.
- */
- public final static InetSocketAddress TEST_SERVER_ADDR = new InetSocketAddress("127.0.0.1", 0);
- /** The request handler registry. */
- private final HttpRequestHandlerRegistry handlerRegistry;
- /**
- * The HTTP processor. If the interceptors are thread safe and the list is not
- * modified during operation, the processor is thread safe.
- */
- private final BasicHttpProcessor httpProcessor;
- /** The server parameters. */
- private final HttpParams params;
- /** The server socket, while being served. */
- private volatile ServerSocket servicedSocket;
- /** The request listening thread, while listening. */
- private volatile Thread listenerThread;
- /** The number of connections this accepted. */
- private final AtomicInteger acceptedConnections = new AtomicInteger(0);
-
- /**
- * Creates a new test server.
- */
- public LocalHttpServer()
- {
- this.handlerRegistry = new HttpRequestHandlerRegistry();
- this.httpProcessor = new BasicHttpProcessor();
- this.httpProcessor.addInterceptor(new ResponseDate());
- this.httpProcessor.addInterceptor(new ResponseServer());
- this.httpProcessor.addInterceptor(new ResponseContent());
- this.httpProcessor.addInterceptor(new ResponseConnControl());
- this.params = new BasicHttpParams();
- this.params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 5000).setIntParameter(CoreConnectionPNames.SOCKET_BUFFER_SIZE, 8 * 1024).setBooleanParameter(CoreConnectionPNames.STALE_CONNECTION_CHECK, false).setBooleanParameter(CoreConnectionPNames.TCP_NODELAY, true).setParameter(CoreProtocolPNames.ORIGIN_SERVER, "LocalTestServer/1.1");
- }
-
- /**
- * Returns the number of connections this test server has accepted.
- */
- public int getAcceptedConnectionCount()
- {
- return this.acceptedConnections.get();
- }
-
- /**
- * Registers a handler with the local registry.
- *
- * @param pattern
- * the URL pattern to match
- * @param handler
- * the handler to apply
- */
- public void register(String pattern, HttpRequestHandler handler)
- {
- this.handlerRegistry.register(pattern, handler);
- }
-
- /**
- * Unregisters a handler from the local registry.
- *
- * @param pattern
- * the URL pattern
- */
- public void unregister(String pattern)
- {
- this.handlerRegistry.unregister(pattern);
- }
-
- /**
- * Starts this test server. Use {@link #getServicePort getServicePort} to
- * obtain the port number afterwards.
- */
- public void start() throws IOException
- {
- if (servicedSocket != null) {
- return; // Already running
- }
+ private final Logger log = LoggerFactory.getLogger(LocalHttpServer.class);
+ /**
+ * The local address to bind to. The host is an IP number rather than
+ * "localhost" to avoid surprises on hosts that map "localhost" to an IPv6
+ * address or something else. The port is 0 to let the system pick one.
+ */
+ public final static InetSocketAddress TEST_SERVER_ADDR = new InetSocketAddress("127.0.0.1", 0);
+ /**
+ * The request handler registry.
+ */
+ private final HttpRequestHandlerRegistry handlerRegistry;
+ /**
+ * The HTTP processor. If the interceptors are thread safe and the list is not
+ * modified during operation, the processor is thread safe.
+ */
+ private final BasicHttpProcessor httpProcessor;
+ /**
+ * The server parameters.
+ */
+ private final HttpParams params;
+ /**
+ * The server socket, while being served.
+ */
+ private volatile ServerSocket servicedSocket;
+ /**
+ * The request listening thread, while listening.
+ */
+ private volatile Thread listenerThread;
+ /**
+ * The number of connections this accepted.
+ */
+ private final AtomicInteger acceptedConnections = new AtomicInteger(0);
- ServerSocket ssock = new ServerSocket();
- ssock.setReuseAddress(true); // probably pointless for port '0'
- ssock.bind(TEST_SERVER_ADDR);
- this.servicedSocket = ssock;
-
- this.listenerThread = new Thread(new RequestListener());
- this.listenerThread.setDaemon(false);
- this.listenerThread.start();
- }
-
- /**
- * Stops this test server.
- */
- public void stop() throws IOException
- {
- if (this.servicedSocket == null) {
- return; // not running
+ /**
+ * Creates a new test server.
+ */
+ public LocalHttpServer() {
+ this.handlerRegistry = new HttpRequestHandlerRegistry();
+ this.httpProcessor = new BasicHttpProcessor();
+ this.httpProcessor.addInterceptor(new ResponseDate());
+ this.httpProcessor.addInterceptor(new ResponseServer());
+ this.httpProcessor.addInterceptor(new ResponseContent());
+ this.httpProcessor.addInterceptor(new ResponseConnControl());
+ this.params = new BasicHttpParams();
+ this.params.setIntParameter(CoreConnectionPNames.SO_TIMEOUT, 5000).setIntParameter(CoreConnectionPNames.SOCKET_BUFFER_SIZE, 8 * 1024).setBooleanParameter(CoreConnectionPNames.STALE_CONNECTION_CHECK, false).setBooleanParameter(CoreConnectionPNames.TCP_NODELAY, true).setParameter(CoreProtocolPNames.ORIGIN_SERVER, "LocalTestServer/1.1");
}
- try {
- this.servicedSocket.close();
- } catch (IOException ex) {
- log.error(ex.getMessage(), ex);
- } finally {
- this.servicedSocket = null;
+ /**
+ * Returns the number of connections this test server has accepted.
+ */
+ public int getAcceptedConnectionCount() {
+ return this.acceptedConnections.get();
}
- if (this.listenerThread != null) {
- this.listenerThread.interrupt();
- this.listenerThread = null;
+ /**
+ * Registers a handler with the local registry.
+ *
+ * @param pattern the URL pattern to match
+ * @param handler the handler to apply
+ */
+ public void register(String pattern, HttpRequestHandler handler) {
+ this.handlerRegistry.register(pattern, handler);
}
- }
- @Override
- public String toString()
- {
- ServerSocket ssock = servicedSocket; // avoid synchronization
- StringBuffer sb = new StringBuffer(80);
- sb.append("LocalTestServer/");
- if (ssock == null) {
- sb.append("stopped");
- } else {
- sb.append(ssock.getLocalSocketAddress());
+ /**
+ * Unregisters a handler from the local registry.
+ *
+ * @param pattern the URL pattern
+ */
+ public void unregister(String pattern) {
+ this.handlerRegistry.unregister(pattern);
}
- return sb.toString();
- }
- /**
- * Obtains the port this server is servicing.
- *
- * @return the service port
- */
- public int getServicePort()
- {
- ServerSocket ssock = this.servicedSocket; // avoid synchronization
- Preconditions.checkState(ssock != null, "not running");
- return ssock.getLocalPort();
- }
-
- /**
- * Obtains the local address the server is listening on
- *
- * @return the service address
- */
- public SocketAddress getServiceAddress()
- {
- ServerSocket ssock = this.servicedSocket; // avoid synchronization
- Preconditions.checkState(ssock != null, "not running");
- return ssock.getLocalSocketAddress();
- }
-
- /**
- * The request listener. Accepts incoming connections and launches a service
- * thread.
- */
- public class RequestListener implements Runnable
- {
-
- /** The workers launched from here. */
- private final Set<Thread> workerThreads;
-
- public RequestListener()
- {
- super();
- this.workerThreads = Collections.synchronizedSet(new HashSet<Thread>());
+ /**
+ * Starts this test server. Use {@link #getServicePort getServicePort} to
+ * obtain the port number afterwards.
+ */
+ public void start() throws IOException {
+ if (servicedSocket != null) {
+ return; // Already running
+ }
+
+ ServerSocket ssock = new ServerSocket();
+ ssock.setReuseAddress(true); // probably pointless for port '0'
+ ssock.bind(TEST_SERVER_ADDR);
+ this.servicedSocket = ssock;
+
+ this.listenerThread = new Thread(new RequestListener());
+ this.listenerThread.setDaemon(false);
+ this.listenerThread.start();
}
- public void run()
- {
- try {
- while ((servicedSocket != null) && (listenerThread == Thread.currentThread())
- && !Thread.interrupted()) {
- try {
- accept();
- } catch (Exception ex) {
- ServerSocket ssock = servicedSocket;
- if ((ssock != null) && !ssock.isClosed()) {
- log.error(LocalHttpServer.this.toString() + " could not accept", ex);
- }
- // otherwise ignore the exception silently
- break;
- }
+ /**
+ * Stops this test server.
+ */
+ public void stop() throws IOException {
+ if (this.servicedSocket == null) {
+ return; // not running
}
- } finally {
- cleanup();
- }
- }
- protected void accept() throws IOException
- {
- // Set up HTTP connection
- Socket socket = servicedSocket.accept();
- acceptedConnections.incrementAndGet();
- DefaultHttpServerConnection conn = new DefaultHttpServerConnection();
- conn.bind(socket, params);
-
- // Set up the HTTP service
- HttpService httpService = new HttpService(httpProcessor,
- new DefaultConnectionReuseStrategy(), new DefaultHttpResponseFactory());
- httpService.setParams(params);
- httpService.setHandlerResolver(handlerRegistry);
-
- // Start worker thread
- Thread t = new Thread(new Worker(httpService, conn));
- workerThreads.add(t);
- t.setDaemon(true);
- t.start();
+ try {
+ this.servicedSocket.close();
+ } catch (IOException ex) {
+ log.error(ex.getMessage(), ex);
+ } finally {
+ this.servicedSocket = null;
+ }
+ if (this.listenerThread != null) {
+ this.listenerThread.interrupt();
+ this.listenerThread = null;
+ }
}
- protected void cleanup()
- {
- Thread[] threads = workerThreads.toArray(new Thread[0]);
- for (int i = 0; i < threads.length; i++) {
- if (threads[i] != null) {
- threads[i].interrupt();
+ @Override
+ public String toString() {
+ ServerSocket ssock = servicedSocket; // avoid synchronization
+ StringBuffer sb = new StringBuffer(80);
+ sb.append("LocalTestServer/");
+ if (ssock == null) {
+ sb.append("stopped");
+ } else {
+ sb.append(ssock.getLocalSocketAddress());
}
- }
+ return sb.toString();
}
/**
- * A worker for serving incoming requests.
+ * Obtains the port this server is servicing.
+ *
+ * @return the service port
*/
- public class Worker implements Runnable
- {
+ public int getServicePort() {
+ ServerSocket ssock = this.servicedSocket; // avoid synchronization
+ Preconditions.checkState(ssock != null, "not running");
+ return ssock.getLocalPort();
+ }
- private final HttpService httpservice;
- private final HttpServerConnection conn;
+ /**
+ * Obtains the local address the server is listening on
+ *
+ * @return the service address
+ */
+ public SocketAddress getServiceAddress() {
+ ServerSocket ssock = this.servicedSocket; // avoid synchronization
+ Preconditions.checkState(ssock != null, "not running");
+ return ssock.getLocalSocketAddress();
+ }
- public Worker(final HttpService httpservice, final HttpServerConnection conn)
- {
+ /**
+ * The request listener. Accepts incoming connections and launches a service
+ * thread.
+ */
+ public class RequestListener implements Runnable {
- this.httpservice = httpservice;
- this.conn = conn;
- }
+ /**
+ * The workers launched from here.
+ */
+ private final Set<Thread> workerThreads;
+
+ public RequestListener() {
+ super();
+ this.workerThreads = Collections.synchronizedSet(new HashSet<Thread>());
+ }
- public void run()
- {
- HttpContext context = new BasicHttpContext(null);
- try {
- while ((servicedSocket != null) && this.conn.isOpen() && !Thread.interrupted()) {
- this.httpservice.handleRequest(this.conn, context);
- }
- } catch (IOException ex) {
- // ignore silently
- } catch (HttpException ex) {
- // ignore silently
- } finally {
- workerThreads.remove(Thread.currentThread());
- try {
- this.conn.shutdown();
- } catch (IOException ignore) {
- }
+ public void run() {
+ try {
+ while ((servicedSocket != null) && (listenerThread == Thread.currentThread())
+ && !Thread.interrupted()) {
+ try {
+ accept();
+ } catch (Exception ex) {
+ ServerSocket ssock = servicedSocket;
+ if ((ssock != null) && !ssock.isClosed()) {
+ log.error(LocalHttpServer.this.toString() + " could not accept", ex);
+ }
+ // otherwise ignore the exception silently
+ break;
+ }
+ }
+ } finally {
+ cleanup();
+ }
+ }
+
+ protected void accept() throws IOException {
+ // Set up HTTP connection
+ Socket socket = servicedSocket.accept();
+ acceptedConnections.incrementAndGet();
+ DefaultHttpServerConnection conn = new DefaultHttpServerConnection();
+ conn.bind(socket, params);
+
+ // Set up the HTTP service
+ HttpService httpService = new HttpService(httpProcessor,
+ new DefaultConnectionReuseStrategy(), new DefaultHttpResponseFactory());
+ httpService.setParams(params);
+ httpService.setHandlerResolver(handlerRegistry);
+
+ // Start worker thread
+ Thread t = new Thread(new Worker(httpService, conn));
+ workerThreads.add(t);
+ t.setDaemon(true);
+ t.start();
+
+ }
+
+ protected void cleanup() {
+ Thread[] threads = workerThreads.toArray(new Thread[0]);
+ for (int i = 0; i < threads.length; i++) {
+ if (threads[i] != null) {
+ threads[i].interrupt();
+ }
+ }
+ }
+
+ /**
+ * A worker for serving incoming requests.
+ */
+ public class Worker implements Runnable {
+
+ private final HttpService httpservice;
+ private final HttpServerConnection conn;
+
+ public Worker(final HttpService httpservice, final HttpServerConnection conn) {
+
+ this.httpservice = httpservice;
+ this.conn = conn;
+ }
+
+ public void run() {
+ HttpContext context = new BasicHttpContext(null);
+ try {
+ while ((servicedSocket != null) && this.conn.isOpen() && !Thread.interrupted()) {
+ this.httpservice.handleRequest(this.conn, context);
+ }
+ } catch (IOException ex) {
+ // ignore silently
+ } catch (HttpException ex) {
+ // ignore silently
+ } finally {
+ workerThreads.remove(Thread.currentThread());
+ try {
+ this.conn.shutdown();
+ } catch (IOException ignore) {
+ }
+ }
+ }
}
- }
}
- }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/localserver/ResourceHandler.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/localserver/ResourceHandler.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/localserver/ResourceHandler.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-examples/src/main/java/org/apache/droids/localserver/ResourceHandler.java Tue Dec 18 08:47:39 2012
@@ -34,41 +34,40 @@ import org.apache.http.protocol.HttpRequ
/**
* A handler that serves out a resource
*/
-public class ResourceHandler implements HttpRequestHandler
-{
+public class ResourceHandler implements HttpRequestHandler {
- public void handle(final HttpRequest request, final HttpResponse response,
- final HttpContext context) throws HttpException, IOException {
+ public void handle(final HttpRequest request, final HttpResponse response,
+ final HttpContext context) throws HttpException, IOException {
- String method = request.getRequestLine().getMethod().toUpperCase(Locale.ENGLISH);
- if (!"GET".equals(method) && !"HEAD".equals(method)) {
- throw new MethodNotSupportedException(method + " not supported by " + getClass().getName());
- }
- String requestURI = request.getRequestLine().getUri();
- String s = requestURI;
- if (!s.startsWith("/")) {
- s = "/" + s;
- }
- s = "resources" + s;
-
- ClassLoader cl = ResourceHandler.class.getClassLoader();
- URL resource = cl.getResource(s);
-
- if (resource != null) {
- InputStream instream = resource.openStream();
- InputStreamEntity entity = new InputStreamEntity(instream, -1);
- if (requestURI.endsWith("_html")) {
- entity.setContentType("text/html");
- entity.setChunked(true);
- }
- response.setEntity(entity);
-
- } else {
- response.setStatusCode(HttpStatus.SC_NOT_FOUND);
- StringEntity entity = new StringEntity(requestURI + " not found", "US-ASCII");
- entity.setContentType("text/html");
- response.setEntity(entity);
+ String method = request.getRequestLine().getMethod().toUpperCase(Locale.ENGLISH);
+ if (!"GET".equals(method) && !"HEAD".equals(method)) {
+ throw new MethodNotSupportedException(method + " not supported by " + getClass().getName());
+ }
+ String requestURI = request.getRequestLine().getUri();
+ String s = requestURI;
+ if (!s.startsWith("/")) {
+ s = "/" + s;
+ }
+ s = "resources" + s;
+
+ ClassLoader cl = ResourceHandler.class.getClassLoader();
+ URL resource = cl.getResource(s);
+
+ if (resource != null) {
+ InputStream instream = resource.openStream();
+ InputStreamEntity entity = new InputStreamEntity(instream, -1);
+ if (requestURI.endsWith("_html")) {
+ entity.setContentType("text/html");
+ entity.setChunked(true);
+ }
+ response.setEntity(entity);
+
+ } else {
+ response.setStatusCode(HttpStatus.SC_NOT_FOUND);
+ StringEntity entity = new StringEntity(requestURI + " not found", "US-ASCII");
+ entity.setContentType("text/html");
+ response.setEntity(entity);
+ }
}
- }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-examples/src/test/java/org/apache/droids/examples/TestCrawlingWorker.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-examples/src/test/java/org/apache/droids/examples/TestCrawlingWorker.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-examples/src/test/java/org/apache/droids/examples/TestCrawlingWorker.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-examples/src/test/java/org/apache/droids/examples/TestCrawlingWorker.java Tue Dec 18 08:47:39 2012
@@ -42,68 +42,68 @@ import org.mockito.Mockito;
import org.mockito.internal.stubbing.defaultanswers.ReturnsMocks;
public class TestCrawlingWorker {
- CrawlingWorker instance;
- private TikaDocumentParser htmlParser;
+ CrawlingWorker instance;
+ private TikaDocumentParser htmlParser;
- @Before
- public void initialize() {
- final Queue<Link> queue = new LinkedList<Link>();
- final CrawlingDroid droid = createDroid(queue);
- instance = (CrawlingWorker) droid.getNewWorker();
- }
-
- private final CrawlingDroid createDroid(final Queue<Link> queue) {
- final CrawlingDroid droid = new SysoutCrawlingDroid(queue, null);
-
- final ProtocolFactory protocolFactory = DroidsFactory
- .createDefaultProtocolFactory();
- droid.setProtocolFactory(protocolFactory);
-
- final ParserFactory parserFactory = parserSetup();
- droid.setParserFactory(parserFactory);
- return droid;
- }
-
- private final ParserFactory parserSetup() {
- final ParserFactory parserFactory = new ParserFactory();
-
- htmlParser = Mockito.mock(TikaDocumentParser.class, new ReturnsMocks());
-
- parserFactory.getMap().put("text/html", htmlParser);
- return parserFactory;
- }
-
- //
- @After
- public void cleanup() {
- instance = null;
- htmlParser = null;
- }
-
- //
- @Test
- public void nothingHappens() {
- // Arrange
-
- // Act
-
- // Assert
- }
-
- // execute
- @Test
- public void execute_linkIsParsed() throws DroidsException, IOException,
- URISyntaxException {
- // Arrange
- final Link link = new LinkTask(null, new URI("http://www.google.com"),
- 1);
-
- // Act
- this.instance.execute(link);
-
- // Assert
- Mockito.verify(htmlParser).parse(Matchers.any(ContentEntity.class),
- Matchers.any(Link.class));
- }
+ @Before
+ public void initialize() {
+ final Queue<Link> queue = new LinkedList<Link>();
+ final CrawlingDroid droid = createDroid(queue);
+ instance = (CrawlingWorker) droid.getNewWorker();
+ }
+
+ private final CrawlingDroid createDroid(final Queue<Link> queue) {
+ final CrawlingDroid droid = new SysoutCrawlingDroid(queue, null);
+
+ final ProtocolFactory protocolFactory = DroidsFactory
+ .createDefaultProtocolFactory();
+ droid.setProtocolFactory(protocolFactory);
+
+ final ParserFactory parserFactory = parserSetup();
+ droid.setParserFactory(parserFactory);
+ return droid;
+ }
+
+ private final ParserFactory parserSetup() {
+ final ParserFactory parserFactory = new ParserFactory();
+
+ htmlParser = Mockito.mock(TikaDocumentParser.class, new ReturnsMocks());
+
+ parserFactory.getMap().put("text/html", htmlParser);
+ return parserFactory;
+ }
+
+ //
+ @After
+ public void cleanup() {
+ instance = null;
+ htmlParser = null;
+ }
+
+ //
+ @Test
+ public void nothingHappens() {
+ // Arrange
+
+ // Act
+
+ // Assert
+ }
+
+ // execute
+ @Test
+ public void execute_linkIsParsed() throws DroidsException, IOException,
+ URISyntaxException {
+ // Arrange
+ final Link link = new LinkTask(null, new URI("http://www.google.com"),
+ 1);
+
+ // Act
+ this.instance.execute(link);
+
+ // Assert
+ Mockito.verify(htmlParser).parse(Matchers.any(ContentEntity.class),
+ Matchers.any(Link.class));
+ }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-examples/src/test/java/org/apache/droids/examples/TestSimpleDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-examples/src/test/java/org/apache/droids/examples/TestSimpleDroid.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-examples/src/test/java/org/apache/droids/examples/TestSimpleDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-examples/src/test/java/org/apache/droids/examples/TestSimpleDroid.java Tue Dec 18 08:47:39 2012
@@ -36,81 +36,81 @@ import org.junit.Test;
public class TestSimpleDroid {
- protected LocalHttpServer testserver;
+ protected LocalHttpServer testserver;
- @Before
- public void initializeLocalTestServer() {
- this.testserver = new LocalHttpServer();
- }
-
- @After
- public void shutdownLocalTestServer() throws IOException {
- this.testserver.stop();
- }
-
- @Test
- public void testBasicCrawling() throws Exception {
- this.testserver.register("*", new ResourceHandler());
- this.testserver.start();
-
- String baseURI = "http:/" + this.testserver.getServiceAddress();
- String targetURI = baseURI + "/start_html";
-
- Droid<Link> droid = DroidsFactory.createSimpleReportCrawlingDroid(targetURI);
-
- droid.init();
- droid.start();
-
- while (!droid.getTaskMaster().awaitTermination(250L, TimeUnit.MILLISECONDS))
- ;
-
- Assert.assertFalse(ReportHandler.getReport().isEmpty());
- Assert.assertEquals(5, ReportHandler.getReport().size());
- Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/start_html"));
- Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page1_html"));
- Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page2_html"));
- Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page3_html"));
- Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page4_html"));
-
- ReportHandler.recycle();
- }
-
- @Test
- public void testTerminateCrawlingOnException() throws Exception {
- this.testserver.register("*", new ResourceHandler());
- this.testserver.start();
-
- String baseURI = "http:/" + this.testserver.getServiceAddress();
- String targetURI = baseURI + "/start_html";
-
- Droid<Link> droid = DroidsFactory.createSimpleExceptionCrawlingDroid(targetURI);
-
- TaskMaster<Link> taskMaster = (TaskMaster<Link>) droid.getTaskMaster();
- taskMaster.setExceptionHandler(new TaskExceptionHandler() {
-
- public TaskExceptionResult handleException(Exception ex) {
- if (ex instanceof RuntimeException) {
- return TaskExceptionResult.FATAL;
- }
- return TaskExceptionResult.WARN;
- }
-
- });
-
- droid.init();
- droid.start();
- while (!droid.getTaskMaster().awaitTermination(250L, TimeUnit.MILLISECONDS))
- ;
-
- Assert.assertFalse(ReportHandler.getReport().isEmpty());
- Assert.assertEquals(5, ReportHandler.getReport().size());
- Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/start_html"));
- Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page1_html"));
- Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page2_html"));
- Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page3_html"));
- Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page4_html"));
+ @Before
+ public void initializeLocalTestServer() {
+ this.testserver = new LocalHttpServer();
+ }
+
+ @After
+ public void shutdownLocalTestServer() throws IOException {
+ this.testserver.stop();
+ }
+
+ @Test
+ public void testBasicCrawling() throws Exception {
+ this.testserver.register("*", new ResourceHandler());
+ this.testserver.start();
+
+ String baseURI = "http:/" + this.testserver.getServiceAddress();
+ String targetURI = baseURI + "/start_html";
+
+ Droid<Link> droid = DroidsFactory.createSimpleReportCrawlingDroid(targetURI);
+
+ droid.init();
+ droid.start();
+
+ while (!droid.getTaskMaster().awaitTermination(250L, TimeUnit.MILLISECONDS))
+ ;
+
+ Assert.assertFalse(ReportHandler.getReport().isEmpty());
+ Assert.assertEquals(5, ReportHandler.getReport().size());
+ Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/start_html"));
+ Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page1_html"));
+ Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page2_html"));
+ Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page3_html"));
+ Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page4_html"));
+
+ ReportHandler.recycle();
+ }
+
+ @Test
+ public void testTerminateCrawlingOnException() throws Exception {
+ this.testserver.register("*", new ResourceHandler());
+ this.testserver.start();
+
+ String baseURI = "http:/" + this.testserver.getServiceAddress();
+ String targetURI = baseURI + "/start_html";
+
+ Droid<Link> droid = DroidsFactory.createSimpleExceptionCrawlingDroid(targetURI);
+
+ TaskMaster<Link> taskMaster = (TaskMaster<Link>) droid.getTaskMaster();
+ taskMaster.setExceptionHandler(new TaskExceptionHandler() {
+
+ public TaskExceptionResult handleException(Exception ex) {
+ if (ex instanceof RuntimeException) {
+ return TaskExceptionResult.FATAL;
+ }
+ return TaskExceptionResult.WARN;
+ }
+
+ });
+
+ droid.init();
+ droid.start();
+ while (!droid.getTaskMaster().awaitTermination(250L, TimeUnit.MILLISECONDS))
+ ;
+
+ Assert.assertFalse(ReportHandler.getReport().isEmpty());
+ Assert.assertEquals(5, ReportHandler.getReport().size());
+ Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/start_html"));
+ Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page1_html"));
+ Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page2_html"));
+ Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page3_html"));
+ Assert.assertTrue(ReportHandler.getReport().contains(baseURI + "/page4_html"));
- ReportHandler.recycle();
- }
+ ReportHandler.recycle();
+ }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-norobots/pom.xml
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-norobots/pom.xml?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-norobots/pom.xml (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-norobots/pom.xml Tue Dec 18 08:47:39 2012
@@ -25,29 +25,30 @@
<http://www.apache.org />.
-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <parent>
- <artifactId>droids</artifactId>
- <groupId>org.apache.droids</groupId>
- <version>0.3.0-incubating-SNAPSHOT</version>
- <relativePath>../pom.xml</relativePath>
- </parent>
- <artifactId>droids-norobots</artifactId>
- <name>Apache Droids Norobots</name>
- <inceptionYear>2007</inceptionYear>
- <description>
- Apache Droids robots.txt parser
- </description>
- <packaging>jar</packaging>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>droids</artifactId>
+ <groupId>org.apache.droids</groupId>
+ <version>0.3.0-incubating-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+ <artifactId>droids-norobots</artifactId>
+ <name>Apache Droids Norobots</name>
+ <inceptionYear>2007</inceptionYear>
+ <description>
+ Apache Droids robots.txt parser
+ </description>
+ <packaging>jar</packaging>
+
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>${junit.version}</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
- <dependencies>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>${junit.version}</version>
- <scope>test</scope>
- </dependency>
- </dependencies>
-
</project>
Modified: incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/AbstractRule.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/AbstractRule.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/AbstractRule.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/AbstractRule.java Tue Dec 18 08:47:39 2012
@@ -27,27 +27,27 @@
package org.apache.droids.norobots;
/**
- * Provides implementation for the path property and a handy toString.
+ * Provides implementation for the path property and a handy toString.
*/
abstract class AbstractRule implements Rule {
- private String path;
+ private String path;
- public AbstractRule(String path) {
- this.path = path.trim();
- }
-
- /**
- * A url path snippet for which a rule exists
- */
- public String getPath() {
- return this.path;
- }
-
- public abstract Boolean isAllowed(String query);
-
- @Override
- public String toString() {
- return getClass().getName() + " on " + this.path;
- }
+ public AbstractRule(String path) {
+ this.path = path.trim();
+ }
+
+ /**
+ * A url path snippet for which a rule exists
+ */
+ public String getPath() {
+ return this.path;
+ }
+
+ public abstract Boolean isAllowed(String query);
+
+ @Override
+ public String toString() {
+ return getClass().getName() + " on " + this.path;
+ }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/AllowedRule.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/AllowedRule.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/AllowedRule.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/AllowedRule.java Tue Dec 18 08:47:39 2012
@@ -27,27 +27,27 @@
package org.apache.droids.norobots;
/**
- * A norobots Allow: rule.
- * Any path which begins with the rule's path is
- * allowed.
+ * A norobots Allow: rule.
+ * Any path which begins with the rule's path is
+ * allowed.
*/
class AllowedRule extends AbstractRule {
- public AllowedRule(String path) {
- super(path);
- }
-
- @Override
- public Boolean isAllowed(String query) {
- if("".equals(super.getPath())) {
- // What does the spec say here? Until I know, I'll just ignore this.
- return null;
+ public AllowedRule(String path) {
+ super(path);
}
- boolean test = query.startsWith( super.getPath() );
- if(!test) {
- return null;
- } else {
- return Boolean.TRUE;
+
+ @Override
+ public Boolean isAllowed(String query) {
+ if ("".equals(super.getPath())) {
+ // What does the spec say here? Until I know, I'll just ignore this.
+ return null;
+ }
+ boolean test = query.startsWith(super.getPath());
+ if (!test) {
+ return null;
+ } else {
+ return Boolean.TRUE;
+ }
}
- }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/ContentLoader.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/ContentLoader.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/ContentLoader.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/ContentLoader.java Tue Dec 18 08:47:39 2012
@@ -33,11 +33,10 @@ import java.net.URI;
/**
* An abstract loader intended for retrieving content identified by a URI.
*/
-public interface ContentLoader
-{
+public interface ContentLoader {
- boolean exists(URI uri) throws IOException;
+ boolean exists(URI uri) throws IOException;
- InputStream load(URI uri) throws IOException;
+ InputStream load(URI uri) throws IOException;
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/DisallowedRule.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/DisallowedRule.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/DisallowedRule.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/DisallowedRule.java Tue Dec 18 08:47:39 2012
@@ -27,26 +27,26 @@
package org.apache.droids.norobots;
/**
- * A norobots Disallow: rule.
- * Any path which begins with the rule's path is
- * not allowed.
+ * A norobots Disallow: rule.
+ * Any path which begins with the rule's path is
+ * not allowed.
*/
class DisallowedRule extends AbstractRule {
- public DisallowedRule(String path) {
- super(path);
- }
-
- @Override
- public Boolean isAllowed(String query) {
- if("".equals(super.getPath())) {
- return Boolean.TRUE;
+ public DisallowedRule(String path) {
+ super(path);
}
- boolean test = query.startsWith( super.getPath() );
- if(!test) {
- return null;
- } else {
- return Boolean.FALSE;
+
+ @Override
+ public Boolean isAllowed(String query) {
+ if ("".equals(super.getPath())) {
+ return Boolean.TRUE;
+ }
+ boolean test = query.startsWith(super.getPath());
+ if (!test) {
+ return null;
+ } else {
+ return Boolean.FALSE;
+ }
}
- }
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java?rev=1423339&r1=1423338&r2=1423339&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-norobots/src/main/java/org/apache/droids/norobots/NoRobotClient.java Tue Dec 18 08:47:39 2012
@@ -41,245 +41,240 @@ import java.util.Map;
import java.util.Set;
/**
- * A Client which may be used to decide which urls on a website
- * may be looked at, according to the norobots specification
- * located at:
+ * A Client which may be used to decide which urls on a website
+ * may be looked at, according to the norobots specification
+ * located at:
* http://www.robotstxt.org/wc/norobots-rfc.html
*/
public class NoRobotClient {
- private static final String US_ASCII = "US-ASCII";
-
- private final ContentLoader contentLoader;
- private final String userAgent;
-
- private URI baseURI;
- private URI robotsURI;
- private RulesEngine rules;
- private RulesEngine wildcardRules;
-
- /**
- * Create a Client for a particular user-agent name and the given
- * {@link ContentLoader}.
- *
- * @param userAgent name for the robot
- */
- public NoRobotClient(ContentLoader contentLoader, String userAgent) {
- super();
- if (contentLoader == null) {
- throw new IllegalArgumentException("Content loader may not be null");
- }
- this.contentLoader = contentLoader;
- if (userAgent != null) {
- this.userAgent = userAgent.toLowerCase(Locale.ENGLISH);
- } else {
- this.userAgent = null;
- }
- }
-
- /**
- * Create a Client for a particular user-agent name.
- *
- * @param userAgent name for the robot
- */
- public NoRobotClient(String userAgent) {
- this(new SimpleContentLoader(), userAgent);
- }
-
- /**
- * Head to a website and suck in their robots.txt file.
- * Note that the URL passed in is for the website and does
- * not include the robots.txt file itself.
- *
- * @param baseUrl of the site
- */
- public void parse(URI baseUri) throws IOException, NoRobotException {
- URI uri = resolveURI(baseUri, "robots.txt");
- baseURI = baseUri;
- robotsURI = uri;
- rules = null;
- wildcardRules = null;
- // fetch baseUrl+"robots.txt"
- if (!contentLoader.exists(uri)) {
- return;
- }
- InputStream instream = contentLoader.load(uri);
- doParseText(instream);
- }
-
- public void parseText(InputStream instream) throws IOException, NoRobotException {
- doParseText(instream);
- baseURI = createURI("/");
- robotsURI = resolveURI(baseURI, "robots.txt");
- }
-
- private void doParseText(InputStream instream) throws IOException {
- Map<String, RulesEngine> map = parse(instream);
- this.rules = map.get(this.userAgent);
- if (this.rules == null) {
- this.rules = new RulesEngine();
- }
- this.wildcardRules = map.get("*");
- if (this.wildcardRules == null) {
- this.wildcardRules = new RulesEngine();
- }
- }
-
- public static Map<String, RulesEngine> parse(InputStream instream) throws IOException {
- try {
- return doParse(instream);
- } finally {
- instream.close();
- }
- }
-
- enum ParserState
- {
- USER_AGENT_DEF, ALLOW_DISALLOW_DEF
- }
-
- private static Map<String, RulesEngine> doParse(InputStream instream) throws IOException {
-
- Map<String, RulesEngine> map = new HashMap<String, RulesEngine>();
- // Classic basic parser style, read an element at a time,
- // changing a state variable [parsingAllowBlock]
-
- // take each line, one at a time
- BufferedReader rdr = new BufferedReader(new InputStreamReader(instream, US_ASCII));
-
- Set<RulesEngine> engines = new HashSet<RulesEngine>();
-
- ParserState state = ParserState.ALLOW_DISALLOW_DEF;
-
- String line = "";
- while( (line = rdr.readLine()) != null ) {
- // trim whitespace from either side
- line = line.trim();
-
- // ignore startsWith('#')
- if(line.startsWith("#")) {
- continue;
- }
-
- if(line.startsWith("User-agent:")) {
- if (state == ParserState.ALLOW_DISALLOW_DEF) {
- engines.clear();
- }
- state = ParserState.USER_AGENT_DEF;
- String userAgent = line.substring("User-agent:".length());
- userAgent = userAgent.trim().toLowerCase(Locale.ENGLISH);
- RulesEngine engine = map.get(userAgent);
- if (engine == null) {
- engine = new RulesEngine();
- map.put(userAgent, engine);
- }
- engines.add(engine);
- }
- else {
- if (engines.isEmpty()) {
- continue;
- }
- if(line.startsWith("Allow:")) {
- state = ParserState.ALLOW_DISALLOW_DEF;
- String value = line.substring("Allow:".length()).trim();
- value = URLDecoder.decode(value, US_ASCII);
- for (RulesEngine engine: engines) {
- engine.allowPath( value );
- }
- } else
- if(line.startsWith("Disallow:")) {
- state = ParserState.ALLOW_DISALLOW_DEF;
- String value = line.substring("Disallow:".length()).trim();
- value = URLDecoder.decode(value, US_ASCII);
- for (RulesEngine engine: engines) {
- engine.disallowPath( value );
- }
+ private static final String US_ASCII = "US-ASCII";
+
+ private final ContentLoader contentLoader;
+ private final String userAgent;
+
+ private URI baseURI;
+ private URI robotsURI;
+ private RulesEngine rules;
+ private RulesEngine wildcardRules;
+
+ /**
+ * Create a Client for a particular user-agent name and the given
+ * {@link ContentLoader}.
+ *
+ * @param userAgent name for the robot
+ */
+ public NoRobotClient(ContentLoader contentLoader, String userAgent) {
+ super();
+ if (contentLoader == null) {
+ throw new IllegalArgumentException("Content loader may not be null");
+ }
+ this.contentLoader = contentLoader;
+ if (userAgent != null) {
+ this.userAgent = userAgent.toLowerCase(Locale.ENGLISH);
} else {
- // ignore
- continue;
+ this.userAgent = null;
+ }
+ }
+
+ /**
+ * Create a Client for a particular user-agent name.
+ *
+ * @param userAgent name for the robot
+ */
+ public NoRobotClient(String userAgent) {
+ this(new SimpleContentLoader(), userAgent);
+ }
+
+ /**
+ * Head to a website and suck in their robots.txt file.
+ * Note that the URL passed in is for the website and does
+ * not include the robots.txt file itself.
+ *
+ * @param baseUrl of the site
+ */
+ public void parse(URI baseUri) throws IOException, NoRobotException {
+ URI uri = resolveURI(baseUri, "robots.txt");
+ baseURI = baseUri;
+ robotsURI = uri;
+ rules = null;
+ wildcardRules = null;
+ // fetch baseUrl+"robots.txt"
+ if (!contentLoader.exists(uri)) {
+ return;
+ }
+ InputStream instream = contentLoader.load(uri);
+ doParseText(instream);
+ }
+
+ public void parseText(InputStream instream) throws IOException, NoRobotException {
+ doParseText(instream);
+ baseURI = createURI("/");
+ robotsURI = resolveURI(baseURI, "robots.txt");
+ }
+
+ private void doParseText(InputStream instream) throws IOException {
+ Map<String, RulesEngine> map = parse(instream);
+ this.rules = map.get(this.userAgent);
+ if (this.rules == null) {
+ this.rules = new RulesEngine();
}
- }
+ this.wildcardRules = map.get("*");
+ if (this.wildcardRules == null) {
+ this.wildcardRules = new RulesEngine();
+ }
+ }
+
+ public static Map<String, RulesEngine> parse(InputStream instream) throws IOException {
+ try {
+ return doParse(instream);
+ } finally {
+ instream.close();
+ }
+ }
+
+ enum ParserState {
+ USER_AGENT_DEF, ALLOW_DISALLOW_DEF
+ }
+
+ private static Map<String, RulesEngine> doParse(InputStream instream) throws IOException {
+
+ Map<String, RulesEngine> map = new HashMap<String, RulesEngine>();
+ // Classic basic parser style, read an element at a time,
+ // changing a state variable [parsingAllowBlock]
+
+ // take each line, one at a time
+ BufferedReader rdr = new BufferedReader(new InputStreamReader(instream, US_ASCII));
+
+ Set<RulesEngine> engines = new HashSet<RulesEngine>();
+
+ ParserState state = ParserState.ALLOW_DISALLOW_DEF;
+
+ String line = "";
+ while ((line = rdr.readLine()) != null) {
+ // trim whitespace from either side
+ line = line.trim();
+
+ // ignore startsWith('#')
+ if (line.startsWith("#")) {
+ continue;
+ }
+
+ if (line.startsWith("User-agent:")) {
+ if (state == ParserState.ALLOW_DISALLOW_DEF) {
+ engines.clear();
+ }
+ state = ParserState.USER_AGENT_DEF;
+ String userAgent = line.substring("User-agent:".length());
+ userAgent = userAgent.trim().toLowerCase(Locale.ENGLISH);
+ RulesEngine engine = map.get(userAgent);
+ if (engine == null) {
+ engine = new RulesEngine();
+ map.put(userAgent, engine);
+ }
+ engines.add(engine);
+ } else {
+ if (engines.isEmpty()) {
+ continue;
+ }
+ if (line.startsWith("Allow:")) {
+ state = ParserState.ALLOW_DISALLOW_DEF;
+ String value = line.substring("Allow:".length()).trim();
+ value = URLDecoder.decode(value, US_ASCII);
+ for (RulesEngine engine : engines) {
+ engine.allowPath(value);
+ }
+ } else if (line.startsWith("Disallow:")) {
+ state = ParserState.ALLOW_DISALLOW_DEF;
+ String value = line.substring("Disallow:".length()).trim();
+ value = URLDecoder.decode(value, US_ASCII);
+ for (RulesEngine engine : engines) {
+ engine.disallowPath(value);
+ }
+ } else {
+ // ignore
+ continue;
+ }
+ }
+ }
+ return map;
+ }
+
+ /**
+ * Decide if the parsed website will allow this URL to be
+ * be seen.
+ * <p/>
+ * Note that parse(URL) must be called before this method
+ * is called.
+ *
+ * @param url in question
+ * @return is the url allowed?
+ * @throws IllegalStateException when parse has not been called
+ */
+ public boolean isUrlAllowed(URI uri) throws IllegalStateException, IllegalArgumentException {
+ if (baseURI == null || robotsURI == null) {
+ throw new IllegalStateException("You must call parse before you call this method. ");
+ }
+
+ if (!equals(baseURI.getHost(), uri.getHost()) ||
+ baseURI.getPort() != uri.getPort() ||
+ !equals(baseURI.getScheme(), uri.getScheme())) {
+ throw new IllegalArgumentException(
+ "Illegal to use a different url, " + uri.toString() +
+ ", for this robots.txt: " + baseURI.toString());
+ }
+ if (uri.equals(robotsURI)) {
+ return true;
+ }
+
+ String path = uri.getPath();
+ String basepath = baseURI.getPath();
+ if (path.startsWith(basepath)) {
+ path = path.substring(basepath.length());
+ if (!path.startsWith("/")) {
+ path = "/" + path;
+ }
+ }
+
+ try {
+ path = URLDecoder.decode(path, US_ASCII);
+ } catch (UnsupportedEncodingException ex) {
+ // ASCII always supported
+ return false;
+ }
+ Boolean allowed = this.rules != null ? this.rules.isAllowed(path) : null;
+ if (allowed == null) {
+ allowed = this.wildcardRules != null ? this.wildcardRules.isAllowed(path) : null;
+ }
+ if (allowed == null) {
+ allowed = Boolean.TRUE;
+ }
+
+ return allowed.booleanValue();
+ }
+
+
+ /*
+ * Utility methods.
+ */
+ private static URI createURI(String s) throws NoRobotException {
+ try {
+ return new URI(s);
+ } catch (URISyntaxException ex) {
+ throw new NoRobotException("Invalid URI: " + ex.getInput());
+ }
+ }
+
+ private static URI resolveURI(URI base, String s) throws NoRobotException {
+ try {
+ return base.resolve(new URI(s));
+ } catch (URISyntaxException ex) {
+ throw new NoRobotException("Invalid URI: " + ex.getInput());
+ }
+ }
+
+ private static boolean equals(final Object obj1, final Object obj2) {
+ return obj1 == null ? obj2 == null : obj1.equals(obj2);
}
- return map;
- }
- /**
- * Decide if the parsed website will allow this URL to be
- * be seen.
- *
- * Note that parse(URL) must be called before this method
- * is called.
- *
- * @param url in question
- * @return is the url allowed?
- *
- * @throws IllegalStateException when parse has not been called
- */
- public boolean isUrlAllowed(URI uri) throws IllegalStateException, IllegalArgumentException {
- if (baseURI == null || robotsURI == null) {
- throw new IllegalStateException("You must call parse before you call this method. ");
- }
-
- if (!equals(baseURI.getHost(), uri.getHost()) ||
- baseURI.getPort() != uri.getPort() ||
- !equals(baseURI.getScheme(), uri.getScheme()))
- {
- throw new IllegalArgumentException(
- "Illegal to use a different url, " + uri.toString() +
- ", for this robots.txt: " + baseURI.toString());
- }
- if (uri.equals(robotsURI)) {
- return true;
- }
-
- String path = uri.getPath();
- String basepath = baseURI.getPath();
- if (path.startsWith(basepath)) {
- path = path.substring(basepath.length());
- if (!path.startsWith("/")) {
- path = "/" + path;
- }
- }
-
- try {
- path = URLDecoder.decode(path, US_ASCII);
- } catch (UnsupportedEncodingException ex) {
- // ASCII always supported
- return false;
- }
- Boolean allowed = this.rules != null ? this.rules.isAllowed( path ) : null;
- if(allowed == null) {
- allowed = this.wildcardRules != null ? this.wildcardRules.isAllowed( path ) : null;
- }
- if(allowed == null) {
- allowed = Boolean.TRUE;
- }
-
- return allowed.booleanValue();
- }
-
-
- /*
- * Utility methods.
- */
- private static URI createURI(String s) throws NoRobotException {
- try {
- return new URI(s);
- } catch (URISyntaxException ex) {
- throw new NoRobotException("Invalid URI: " + ex.getInput());
- }
- }
-
- private static URI resolveURI(URI base, String s) throws NoRobotException {
- try {
- return base.resolve(new URI(s));
- } catch (URISyntaxException ex) {
- throw new NoRobotException("Invalid URI: " + ex.getInput());
- }
- }
-
- private static boolean equals(final Object obj1, final Object obj2) {
- return obj1 == null ? obj2 == null : obj1.equals(obj2);
- }
-
}