You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by ol...@apache.org on 2008/11/13 15:46:22 UTC
svn commit: r713743 - in /incubator/droids/trunk:
droids-core/src/main/java/org/apache/droids/impl/
droids-core/src/main/java/org/apache/droids/protocol/http/
droids-core/src/main/java/org/apache/droids/robot/crawler/
droids-core/src/test/java/org/apac...
Author: olegk
Date: Thu Nov 13 07:46:22 2008
New Revision: 713743
URL: http://svn.apache.org/viewvc?rev=713743&view=rev
Log:
Fixed broken Droids Spring; reduced logging noise
Modified:
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java
incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/SimpleRuntime.java
incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/Cli.java
incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-context.xml
incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-factories-context.xml
incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core.properties
Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java Thu Nov 13 07:46:22 2008
@@ -61,8 +61,12 @@
this.startedWorking = new Date();
this.finishedWorking = null;
+ int n = getMaxThreads();
+ if (log.isInfoEnabled()) {
+ log.info("Number of concurrent workers: " + n);
+ }
// start the pool
- this.pool = new ThreadPoolExecutor(getMaxThreads(), getMaxThreads(), KEEP_ALIVE,
+ this.pool = new ThreadPoolExecutor(n, n, KEEP_ALIVE,
TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>() );
this.runningWorker = new ConcurrentHashMap<Long, WorkerRunner>();
@@ -81,8 +85,8 @@
//int y = worker.getDepth() + 1;
pool.remove(worker);
runningWorker.remove(id);
- if (log.isInfoEnabled()) {
- log.info("Worker \"" + id + "\" has finished.");
+ if (log.isDebugEnabled()) {
+ log.debug("Worker '" + id + "' has finished.");
}
boolean terminate = false;
@@ -95,10 +99,10 @@
}
switch (result) {
case WARN:
- log.warn(ex.getMessage());
+ log.warn(ex.toString());
break;
case FATAL:
- log.warn(ex.getMessage());
+ log.warn(ex.getMessage(), ex);
terminate = true;
break;
}
@@ -109,7 +113,7 @@
long elapsed = System.currentTimeMillis() - startedWorking.getTime();
if (log.isInfoEnabled()) {
- log.info("All threads have finished. (elapsed:" + elapsed + ")" );
+ log.info("All threads have finished. (elapsed: " + elapsed + ")" );
}
finishedWorking = new Date();
droid.finished();
@@ -141,10 +145,10 @@
}
WorkerRunner runner = new WorkerRunner();
runningWorker.put(runner.getId(), runner);
- pool.execute(runner);
- if (log.isInfoEnabled()) {
- log.info("starting " + runner.getId());
+ if (log.isDebugEnabled()) {
+ log.debug("Starting worker '" + runner.getId() + "'");
}
+ pool.execute(runner);
}
}
Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java Thu Nov 13 07:46:22 2008
@@ -51,6 +51,7 @@
import org.apache.http.impl.conn.ProxySelectorRoutePlanner;
import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
import org.apache.http.params.BasicHttpParams;
+import org.apache.http.params.HttpConnectionParams;
import org.apache.http.params.HttpParams;
import org.apache.http.params.HttpProtocolParams;
import org.apache.http.protocol.BasicHttpContext;
@@ -78,6 +79,11 @@
super(null, null);
}
+ public DroidsHttpClient(HttpParams params)
+ {
+ super(null, params);
+ }
+
@Override
protected HttpParams createHttpParams()
{
@@ -85,7 +91,7 @@
HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
HttpProtocolParams.setContentCharset(params, HTTP.DEFAULT_CONTENT_CHARSET);
HttpProtocolParams.setUseExpectContinue(params, true);
- HttpProtocolParams.setUserAgent(params, "Apache-Droids/1.1");
+ HttpConnectionParams.setStaleCheckingEnabled(params, false);
return params;
}
Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java Thu Nov 13 07:46:22 2008
@@ -34,7 +34,6 @@
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpResponseException;
import org.apache.http.client.methods.HttpGet;
-import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.params.CoreProtocolPNames;
/**
@@ -56,7 +55,7 @@
}
public HttpProtocol() {
- this(new DefaultHttpClient());
+ this(new DroidsHttpClient());
}
public ContentEntity load(URI uri) throws IOException {
Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java Thu Nov 13 07:46:22 2008
@@ -44,26 +44,29 @@
public void execute(Link link) throws DroidsException, IOException
{
final String userAgent = this.getClass().getCanonicalName();
- if (log.isInfoEnabled()) {
- log.info("Starting " + userAgent);
+ if (log.isDebugEnabled()) {
+ log.debug("Starting " + userAgent);
}
URI uri = link.getURI();
final Protocol protocol = droid.getProtocolFactory().getProtocol(uri);
if (protocol.isAllowed(uri)) {
+ if (log.isInfoEnabled()) {
+ log.info("Loading " + uri);
+ }
ContentEntity entity = protocol.load(uri);
try {
String contentType = entity.getMimeType();
- if (log.isInfoEnabled()) {
- log.info("contentType " + contentType);
+ if (log.isDebugEnabled()) {
+ log.debug("Content type " + contentType);
}
if (contentType == null){
- log.info("missing contentType... can't parse...");
+ log.info("Missing content type... can't parse...");
}
else {
Parser parser = droid.getParserFactory().getParser(contentType);
if( parser == null ) {
- if (log.isInfoEnabled()) {
- log.info("could not find parser for: " + contentType);
+ if (log.isDebugEnabled()) {
+ log.debug("Could not find parser for " + contentType);
}
}
else {
Modified: incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/SimpleRuntime.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/SimpleRuntime.java?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/SimpleRuntime.java (original)
+++ incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/SimpleRuntime.java Thu Nov 13 07:46:22 2008
@@ -19,6 +19,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
+import java.util.concurrent.TimeUnit;
import org.apache.droids.robot.crawler.CrawlingDroid;
import org.apache.droids.api.Link;
@@ -34,7 +35,16 @@
import org.apache.droids.impl.SimpleTaskQueue;
import org.apache.droids.net.RegexURLFilter;
import org.apache.droids.parse.html.HtmlParser;
+import org.apache.droids.protocol.http.DroidsHttpClient;
import org.apache.droids.protocol.http.HttpProtocol;
+import org.apache.http.HttpVersion;
+import org.apache.http.conn.params.ConnManagerParamBean;
+import org.apache.http.conn.params.ConnPerRouteBean;
+import org.apache.http.params.BasicHttpParams;
+import org.apache.http.params.HttpConnectionParamBean;
+import org.apache.http.params.HttpParams;
+import org.apache.http.params.HttpProtocolParamBean;
+import org.apache.http.protocol.HTTP;
/**
* Simple Droids runtime that wires various components together in Java code
@@ -65,11 +75,32 @@
parserFactory.setMap(new HashMap<String, Object>());
parserFactory.getMap().put("text/html", htmlParser);
- // Create protocol factory. Support HTTP only.
+ // Create protocol factory. Support HTTP/S only.
ProtocolFactory protocolFactory = new ProtocolFactory();
- HttpProtocol httpProtocol = new HttpProtocol();
+
+ // Create and configure HTTP client
+ HttpParams params = new BasicHttpParams();
+ HttpProtocolParamBean hppb = new HttpProtocolParamBean(params);
+ HttpConnectionParamBean hcpb = new HttpConnectionParamBean(params);
+ ConnManagerParamBean cmpb = new ConnManagerParamBean(params);
+
+ // Set protocol parametes
+ hppb.setVersion(HttpVersion.HTTP_1_1);
+ hppb.setContentCharset(HTTP.ISO_8859_1);
+ hppb.setUseExpectContinue(true);
+ // Set connection parameters
+ hcpb.setStaleCheckingEnabled(false);
+ // Set connection manager parameters
+ ConnPerRouteBean connPerRouteBean = new ConnPerRouteBean();
+ connPerRouteBean.setDefaultMaxPerRoute(2);
+ cmpb.setConnectionsPerRoute(connPerRouteBean);
+
+ DroidsHttpClient httpclient = new DroidsHttpClient(params);
+
+ HttpProtocol httpProtocol = new HttpProtocol(httpclient);
protocolFactory.setMap(new HashMap<String, Object>());
protocolFactory.getMap().put("http", httpProtocol);
+ protocolFactory.getMap().put("https", httpProtocol);
// Create URL filter factory.
URLFiltersFactory filtersFactory = new URLFiltersFactory();
@@ -111,8 +142,14 @@
initialLocations.add( targetURL );
helloCrawler.setInitialLocations(initialLocations);
+ // Initialize and start the crawler
helloCrawler.init();
helloCrawler.start();
+
+ // Await termination
+ helloCrawler.getTaskMaster().awaitTermination(0, TimeUnit.MILLISECONDS);
+ // Shut down the HTTP connection manager
+ httpclient.getConnectionManager().shutdown();
}
}
Modified: incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/Cli.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/Cli.java?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/Cli.java (original)
+++ incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/Cli.java Thu Nov 13 07:46:22 2008
@@ -16,9 +16,15 @@
*/
package org.apache.droids.dynamic;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.droids.exception.DroidsException;
+import org.apache.droids.api.Droid;
+import org.apache.droids.robot.crawler.CrawlingDroid;
+import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
/**
@@ -33,40 +39,43 @@
}
private static final Log log = LogFactory.getLog(Cli.class);
- private static ClassPathXmlApplicationContext context=null;
-
/**
* Invoke the processing with droids.
* @param args You need to provide the droid name (e.g. crawler-x-m02y07) and optional the config file.
- * @throws DroidsException
+ * @throws Exception
*/
- public static void main(String[] args) throws DroidsException {
- String config=null;
- if (args.length < 1 | args.length>2) {
- final String message = "To invoke a droid:\n"
- + "You need to provide the droid name (e.g. crawler-x-m02y07) and optional" +
- "the config file.";
- log.error(message);
- throw new DroidsException(message);
- }else if (args.length==2){
- config = args[1];
+ public static void main(String[] args) throws Exception {
+ if (args.length < 1) {
+ log.error("To invoke a droid:\n"
+ + "You need to provide a droid name (e.g. crawler-x-m02y07)" +
+ " and optionally an initial location to crawl");
+ return;
}
- final String name = args[0];
- if (null == config) {
- setContext(new ClassPathXmlApplicationContext(
- new String[] { "classpath:/org/apache/droids/droids-core-context.xml" }));
- } else {
- setContext(new ClassPathXmlApplicationContext(new String[] { config }));
+ String name = args[0];
+ String location = null;
+ if (args.length > 1) {
+ location = args[1];
}
- DroidsConfig core = (DroidsConfig) context.getBean("org.apache.droids.Core");
+ ApplicationContext context = new ClassPathXmlApplicationContext(
+ "classpath:/org/apache/droids/dynamic/droids-core-context.xml");
+
+ DroidsConfig config = (DroidsConfig) context.getBean("org.apache.droids.dynamic.DroidsConfig");
+ Droid droid = config.getDroid(name);
+
+ if (droid == null) {
+ log.error("Droid " + name + " is not defined");
+ return;
+ }
+
log.info("A p a c h e D r o i d s - an intelligent robot framework");
- core.start(name);
- }
-
- private static void setContext(
- ClassPathXmlApplicationContext classPathXmlApplicationContext) {
- context = classPathXmlApplicationContext;
-
+ if (droid instanceof CrawlingDroid) {
+ List<String> locations = new ArrayList<String>();
+ locations.add(location);
+ ((CrawlingDroid) droid).setInitialLocations(locations);
+ }
+ droid.init();
+ droid.start();
+ droid.getTaskMaster().awaitTermination(0, TimeUnit.MILLISECONDS);
}
}
Modified: incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-context.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-context.xml?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-context.xml (original)
+++ incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-context.xml Thu Nov 13 07:46:22 2008
@@ -39,16 +39,20 @@
<property name="locations" value="classpath:org/apache/droids/dynamic/droids-core.properties"/>
</bean>
+ <bean name="taskExceptionHandler"
+ class="org.apache.droids.impl.DefaultTaskExceptionHandler">
+ </bean>
<bean name="taskMaster"
class="org.apache.droids.impl.MultiThreadedTaskMaster">
+ <property name="exceptionHandler" ref="taskExceptionHandler" />
<property name="delayTimer" ref="org.apache.droids.delay.SimpleDelayTimer"/>
<property name="maxThreads" value="${droids.maxThreads}"/>
</bean>
<!-- Droids -->
<bean name="org.apache.droids.api.Droid/hello"
- class="org.apache.droids.crawler.CrawlingDroid">
+ class="org.apache.droids.robot.crawler.CrawlingDroid">
<constructor-arg ref="taskMaster" />
<constructor-arg ref="org.apache.droids.impl.SimpleTaskQueue" />
@@ -56,34 +60,21 @@
<property name="parserFactory" ref="org.apache.droids.helper.factories.ParserFactory"/>
<property name="handlerFactory" ref="org.apache.droids.helper.factories.HandlerFactory"/>
<property name="filtersFactory" ref="org.apache.droids.helper.factories.URLFiltersFactory"/>
-
- <property name="initialLocations">
- <set>
- <value>${droids.initial.url}</value>
- </set>
- </property>
-
</bean>
<!-- Queue -->
<bean id="org.apache.droids.impl.SimpleTaskQueue"
class="org.apache.droids.impl.SimpleTaskQueue">
- <!--
- <property name="maxDepth" value="${droids.queue.maxDepth}"/>
<property name="maxSize" value="${droids.queue.maxSize}"/>
- -->
</bean>
<!-- Protocol -->
<bean
name="org.apache.droids.api.Protocol/http"
- class="org.apache.droids.protocol.http.Http" scope="prototype">
- <property name="from" value="labs@labs.apache.org"/>
- <property name="refer" value="http://svn.apache.org/repos/asf/labs/droids"/>
+ class="org.apache.droids.protocol.http.HttpProtocol" scope="singleton">
<property name="userAgent" value="DROIDS-crawler-x-m01y08"/>
- <property name="timeout" value="10000"/>
<property name="forceAllow" value="${droids.protocol.http.force}"/>
</bean>
<bean name="org.apache.droids.api.Protocol/file"
- class="org.apache.droids.protocol.file.FileProtocol" scope="prototype"/>
+ class="org.apache.droids.protocol.file.FileProtocol" scope="singleton"/>
<!-- Parser -->
<bean
name="text/html"
@@ -117,6 +108,6 @@
<bean
name="org.apache.droids.delay.SimpleDelayTimer"
class="org.apache.droids.delay.SimpleDelayTimer">
- <property name="delay" value="${droids.delay.request}"/>
+ <property name="delayMillis" value="${droids.delay.request}"/>
</bean>
</beans>
Modified: incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-factories-context.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-factories-context.xml?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-factories-context.xml (original)
+++ incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-factories-context.xml Thu Nov 13 07:46:22 2008
@@ -22,7 +22,7 @@
http://cocoon.apache.org/schema/configurator http://cocoon.apache.org/schema/configurator/cocoon-configurator-1.1.0.xsd">
<!-- Core - factories register -->
- <bean id="org.apache.droids.Core" class="org.apache.droids.Core">
+ <bean id="org.apache.droids.dynamic.DroidsConfig" class="org.apache.droids.dynamic.DroidsConfig">
<property name="droids"
ref="org.apache.droids.helper.factories.DroidFactory"/>
<property name="protocolFactory"
Modified: incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core.properties
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core.properties?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core.properties (original)
+++ incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core.properties Thu Nov 13 07:46:22 2008
@@ -1,9 +1,7 @@
-droids.maxThreads=@droids.maxThreads@
-droids.initial.url=@droids.initial.url@
-droids.queue.maxDepth=@droids.queue.maxDepth@
-droids.queue.maxSize=@droids.queue.maxSize@
-droids.protocol.http.force=@droids.protocol.http.force@
-droids.filter.regex=@droids.filter.regex@
-droids.handler.save.dir=@droids.handler.save.dir@
-droids.handler.save.includeHost=@droids.handler.save.includeHost@
-droids.delay.request=@droids.delay.request@
\ No newline at end of file
+droids.maxThreads=5
+droids.queue.maxSize=-1
+droids.protocol.http.force=true
+droids.filter.regex=classpath:/regex-urlfilter.txt
+droids.handler.save.dir=tmp/
+droids.handler.save.includeHost=true
+droids.delay.request=100
\ No newline at end of file