You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by ol...@apache.org on 2008/11/13 15:46:22 UTC

svn commit: r713743 - in /incubator/droids/trunk: droids-core/src/main/java/org/apache/droids/impl/ droids-core/src/main/java/org/apache/droids/protocol/http/ droids-core/src/main/java/org/apache/droids/robot/crawler/ droids-core/src/test/java/org/apac...

Author: olegk
Date: Thu Nov 13 07:46:22 2008
New Revision: 713743

URL: http://svn.apache.org/viewvc?rev=713743&view=rev
Log:
Fixed broken Droids Spring; reduced logging noise

Modified:
    incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java
    incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java
    incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java
    incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java
    incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/SimpleRuntime.java
    incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/Cli.java
    incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-context.xml
    incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-factories-context.xml
    incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core.properties

Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java Thu Nov 13 07:46:22 2008
@@ -61,8 +61,12 @@
     this.startedWorking = new Date();
     this.finishedWorking = null;
     
+    int n = getMaxThreads();
+    if (log.isInfoEnabled()) {
+      log.info("Number of concurrent workers: " + n);
+    }
     // start the pool
-    this.pool = new ThreadPoolExecutor(getMaxThreads(), getMaxThreads(), KEEP_ALIVE,
+    this.pool = new ThreadPoolExecutor(n, n, KEEP_ALIVE,
         TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>() );
     this.runningWorker = new ConcurrentHashMap<Long, WorkerRunner>();
     
@@ -81,8 +85,8 @@
         //int y = worker.getDepth() + 1;
         pool.remove(worker);
         runningWorker.remove(id);
-        if (log.isInfoEnabled()) {
-          log.info("Worker \"" + id + "\" has finished.");
+        if (log.isDebugEnabled()) {
+          log.debug("Worker '" + id + "' has finished.");
         }
 
         boolean terminate = false;
@@ -95,10 +99,10 @@
           }
           switch (result) {
           case WARN:
-            log.warn(ex.getMessage());
+            log.warn(ex.toString());
             break;
           case FATAL:
-            log.warn(ex.getMessage());
+            log.warn(ex.getMessage(), ex);
             terminate = true;
             break;
           }
@@ -109,7 +113,7 @@
           
           long elapsed = System.currentTimeMillis() - startedWorking.getTime();
           if (log.isInfoEnabled()) {
-            log.info("All threads have finished. (elapsed:" + elapsed + ")" );
+            log.info("All threads have finished. (elapsed: " + elapsed + ")" );
           }
           finishedWorking = new Date();
           droid.finished();
@@ -141,10 +145,10 @@
       }
       WorkerRunner runner = new WorkerRunner();
       runningWorker.put(runner.getId(), runner);
-      pool.execute(runner);
-      if (log.isInfoEnabled()) {
-        log.info("starting " + runner.getId());
+      if (log.isDebugEnabled()) {
+        log.debug("Starting worker '" + runner.getId() + "'");
       }
+      pool.execute(runner);
     }
   }
 

Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/DroidsHttpClient.java Thu Nov 13 07:46:22 2008
@@ -51,6 +51,7 @@
 import org.apache.http.impl.conn.ProxySelectorRoutePlanner;
 import org.apache.http.impl.conn.tsccm.ThreadSafeClientConnManager;
 import org.apache.http.params.BasicHttpParams;
+import org.apache.http.params.HttpConnectionParams;
 import org.apache.http.params.HttpParams;
 import org.apache.http.params.HttpProtocolParams;
 import org.apache.http.protocol.BasicHttpContext;
@@ -78,6 +79,11 @@
     super(null, null);
   }
 
+  public DroidsHttpClient(HttpParams params)
+  {
+    super(null, params);
+  }
+
   @Override
   protected HttpParams createHttpParams()
   {
@@ -85,7 +91,7 @@
     HttpProtocolParams.setVersion(params, HttpVersion.HTTP_1_1);
     HttpProtocolParams.setContentCharset(params, HTTP.DEFAULT_CONTENT_CHARSET);
     HttpProtocolParams.setUseExpectContinue(params, true);
-    HttpProtocolParams.setUserAgent(params, "Apache-Droids/1.1");
+    HttpConnectionParams.setStaleCheckingEnabled(params, false);
     return params;
   }
 

Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/protocol/http/HttpProtocol.java Thu Nov 13 07:46:22 2008
@@ -34,7 +34,6 @@
 import org.apache.http.client.HttpClient;
 import org.apache.http.client.HttpResponseException;
 import org.apache.http.client.methods.HttpGet;
-import org.apache.http.impl.client.DefaultHttpClient;
 import org.apache.http.params.CoreProtocolPNames;
 
 /**
@@ -56,7 +55,7 @@
   }
   
   public HttpProtocol() {
-    this(new DefaultHttpClient());
+    this(new DroidsHttpClient());
   }
   
   public ContentEntity load(URI uri) throws IOException {

Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/crawler/CrawlingWorker.java Thu Nov 13 07:46:22 2008
@@ -44,26 +44,29 @@
   public void execute(Link link) throws DroidsException, IOException
   {
     final String userAgent = this.getClass().getCanonicalName();
-    if (log.isInfoEnabled()) {
-      log.info("Starting " + userAgent);
+    if (log.isDebugEnabled()) {
+      log.debug("Starting " + userAgent);
     }
     URI uri = link.getURI();
     final Protocol protocol = droid.getProtocolFactory().getProtocol(uri);
     if (protocol.isAllowed(uri)) {
+      if (log.isInfoEnabled()) {
+        log.info("Loading " + uri);
+      }
       ContentEntity entity = protocol.load(uri);
       try {
         String contentType = entity.getMimeType();
-        if (log.isInfoEnabled()) {
-          log.info("contentType " + contentType);
+        if (log.isDebugEnabled()) {
+          log.debug("Content type " + contentType);
         }
         if (contentType == null){
-          log.info("missing contentType... can't parse...");
+          log.info("Missing content type... can't parse...");
         }
         else {
           Parser parser = droid.getParserFactory().getParser(contentType);
           if( parser == null ) {
-            if (log.isInfoEnabled()) {
-              log.info("could not find parser for: " + contentType);
+            if (log.isDebugEnabled()) {
+              log.debug("Could not find parser for " + contentType);
             }
           }
           else {

Modified: incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/SimpleRuntime.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/SimpleRuntime.java?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/SimpleRuntime.java (original)
+++ incubator/droids/trunk/droids-core/src/test/java/org/apache/droids/examples/SimpleRuntime.java Thu Nov 13 07:46:22 2008
@@ -19,6 +19,7 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.droids.robot.crawler.CrawlingDroid;
 import org.apache.droids.api.Link;
@@ -34,7 +35,16 @@
 import org.apache.droids.impl.SimpleTaskQueue;
 import org.apache.droids.net.RegexURLFilter;
 import org.apache.droids.parse.html.HtmlParser;
+import org.apache.droids.protocol.http.DroidsHttpClient;
 import org.apache.droids.protocol.http.HttpProtocol;
+import org.apache.http.HttpVersion;
+import org.apache.http.conn.params.ConnManagerParamBean;
+import org.apache.http.conn.params.ConnPerRouteBean;
+import org.apache.http.params.BasicHttpParams;
+import org.apache.http.params.HttpConnectionParamBean;
+import org.apache.http.params.HttpParams;
+import org.apache.http.params.HttpProtocolParamBean;
+import org.apache.http.protocol.HTTP;
 
 /**
  * Simple Droids runtime that wires various components together in Java code 
@@ -65,11 +75,32 @@
     parserFactory.setMap(new HashMap<String, Object>());
     parserFactory.getMap().put("text/html", htmlParser);
 
-    // Create protocol factory. Support HTTP only.
+    // Create protocol factory. Support HTTP/S only.
     ProtocolFactory protocolFactory = new ProtocolFactory();
-    HttpProtocol httpProtocol = new HttpProtocol();
+    
+    // Create and configure HTTP client 
+    HttpParams params = new BasicHttpParams(); 
+    HttpProtocolParamBean hppb = new HttpProtocolParamBean(params); 
+    HttpConnectionParamBean hcpb = new HttpConnectionParamBean(params); 
+    ConnManagerParamBean cmpb = new ConnManagerParamBean(params); 
+    
+    // Set protocol parametes
+    hppb.setVersion(HttpVersion.HTTP_1_1);
+    hppb.setContentCharset(HTTP.ISO_8859_1);
+    hppb.setUseExpectContinue(true);
+    // Set connection parameters
+    hcpb.setStaleCheckingEnabled(false);
+    // Set connection manager parameters
+    ConnPerRouteBean connPerRouteBean = new ConnPerRouteBean();
+    connPerRouteBean.setDefaultMaxPerRoute(2);
+    cmpb.setConnectionsPerRoute(connPerRouteBean);
+    
+    DroidsHttpClient httpclient = new DroidsHttpClient(params);
+    
+    HttpProtocol httpProtocol = new HttpProtocol(httpclient);
     protocolFactory.setMap(new HashMap<String, Object>());
     protocolFactory.getMap().put("http", httpProtocol);
+    protocolFactory.getMap().put("https", httpProtocol);
     
     // Create URL filter factory.
     URLFiltersFactory filtersFactory = new URLFiltersFactory();
@@ -111,8 +142,14 @@
     initialLocations.add( targetURL );
     helloCrawler.setInitialLocations(initialLocations);
     
+    // Initialize and start the crawler
     helloCrawler.init();
     helloCrawler.start();
+    
+    // Await termination
+    helloCrawler.getTaskMaster().awaitTermination(0, TimeUnit.MILLISECONDS);
+    // Shut down the HTTP connection manager
+    httpclient.getConnectionManager().shutdown();
   }
 
 }

Modified: incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/Cli.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/Cli.java?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/Cli.java (original)
+++ incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/Cli.java Thu Nov 13 07:46:22 2008
@@ -16,9 +16,15 @@
  */
 package org.apache.droids.dynamic;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.droids.exception.DroidsException;
+import org.apache.droids.api.Droid;
+import org.apache.droids.robot.crawler.CrawlingDroid;
+import org.springframework.context.ApplicationContext;
 import org.springframework.context.support.ClassPathXmlApplicationContext;
 
 /**
@@ -33,40 +39,43 @@
   }
   private static final Log log = LogFactory.getLog(Cli.class);
 
-  private static ClassPathXmlApplicationContext context=null;
-
   /**
    * Invoke the processing with droids.
    * @param args You need to provide the droid name (e.g. crawler-x-m02y07) and optional the config file.
-   * @throws DroidsException
+   * @throws Exception
    */
-  public static void main(String[] args) throws DroidsException {
-    String config=null;
-    if (args.length < 1 | args.length>2) {
-      final String message = "To invoke a droid:\n"
-          + "You need to provide the droid name (e.g. crawler-x-m02y07) and optional" +
-                        "the config file.";
-      log.error(message);
-      throw new DroidsException(message);
-    }else if (args.length==2){
-        config = args[1]; 
+  public static void main(String[] args) throws Exception {
+    if (args.length < 1) {
+      log.error("To invoke a droid:\n"
+          + "You need to provide a droid name (e.g. crawler-x-m02y07)" +
+              " and optionally an initial location to crawl");
+      return;
     }
-    final String name = args[0];
-    if (null == config) {
-      setContext(new ClassPathXmlApplicationContext(
-          new String[] { "classpath:/org/apache/droids/droids-core-context.xml" }));
-    } else {
-      setContext(new ClassPathXmlApplicationContext(new String[] { config }));
+    String name = args[0];
+    String location = null;
+    if (args.length > 1) {
+      location = args[1];
     }
-    DroidsConfig core = (DroidsConfig) context.getBean("org.apache.droids.Core");
+    ApplicationContext context = new ClassPathXmlApplicationContext( 
+        "classpath:/org/apache/droids/dynamic/droids-core-context.xml");
+    
+    DroidsConfig config = (DroidsConfig) context.getBean("org.apache.droids.dynamic.DroidsConfig");
+    Droid droid = config.getDroid(name);
+    
+    if (droid == null) {
+      log.error("Droid " + name + " is not defined");
+      return;
+    }
+    
     log.info("A p a c h e    D r o i d s - an intelligent robot framework");
-    core.start(name);
-  }
-
-  private static void setContext(
-      ClassPathXmlApplicationContext classPathXmlApplicationContext) {
-    context = classPathXmlApplicationContext;
-
+    if (droid instanceof CrawlingDroid) {
+      List<String> locations = new ArrayList<String>();
+      locations.add(location);
+      ((CrawlingDroid) droid).setInitialLocations(locations);
+    }
+    droid.init();
+    droid.start();
+    droid.getTaskMaster().awaitTermination(0, TimeUnit.MILLISECONDS);
   }
 
 }

Modified: incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-context.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-context.xml?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-context.xml (original)
+++ incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-context.xml Thu Nov 13 07:46:22 2008
@@ -39,16 +39,20 @@
     <property name="locations" value="classpath:org/apache/droids/dynamic/droids-core.properties"/>
   </bean>
   
+  <bean name="taskExceptionHandler"
+    class="org.apache.droids.impl.DefaultTaskExceptionHandler">
+  </bean>
   
   <bean name="taskMaster"
     class="org.apache.droids.impl.MultiThreadedTaskMaster">
+    <property name="exceptionHandler" ref="taskExceptionHandler" />
     <property name="delayTimer" ref="org.apache.droids.delay.SimpleDelayTimer"/>
     <property name="maxThreads" value="${droids.maxThreads}"/>
   </bean>
   
   <!-- Droids -->
   <bean name="org.apache.droids.api.Droid/hello"
-    class="org.apache.droids.crawler.CrawlingDroid">
+    class="org.apache.droids.robot.crawler.CrawlingDroid">
     <constructor-arg ref="taskMaster" />
     <constructor-arg ref="org.apache.droids.impl.SimpleTaskQueue" />
     
@@ -56,34 +60,21 @@
     <property name="parserFactory" ref="org.apache.droids.helper.factories.ParserFactory"/>
     <property name="handlerFactory" ref="org.apache.droids.helper.factories.HandlerFactory"/>
     <property name="filtersFactory" ref="org.apache.droids.helper.factories.URLFiltersFactory"/>
-    
-    <property name="initialLocations">
-      <set>
-        <value>${droids.initial.url}</value>
-      </set>
-    </property>
-    
   </bean>
   <!-- Queue -->
   <bean id="org.apache.droids.impl.SimpleTaskQueue"
     class="org.apache.droids.impl.SimpleTaskQueue">
-    <!-- 
-    <property name="maxDepth" value="${droids.queue.maxDepth}"/>
     <property name="maxSize" value="${droids.queue.maxSize}"/>
-     -->
   </bean>
   <!-- Protocol -->
   <bean 
     name="org.apache.droids.api.Protocol/http"
-    class="org.apache.droids.protocol.http.Http" scope="prototype">
-    <property name="from" value="labs@labs.apache.org"/>
-    <property name="refer" value="http://svn.apache.org/repos/asf/labs/droids"/>
+    class="org.apache.droids.protocol.http.HttpProtocol" scope="singleton">
     <property name="userAgent" value="DROIDS-crawler-x-m01y08"/>
-    <property name="timeout" value="10000"/>
     <property name="forceAllow" value="${droids.protocol.http.force}"/>
   </bean>
   <bean name="org.apache.droids.api.Protocol/file"
-    class="org.apache.droids.protocol.file.FileProtocol" scope="prototype"/>
+    class="org.apache.droids.protocol.file.FileProtocol" scope="singleton"/>
   <!-- Parser -->
   <bean 
     name="text/html"
@@ -117,6 +108,6 @@
   <bean 
     name="org.apache.droids.delay.SimpleDelayTimer"
     class="org.apache.droids.delay.SimpleDelayTimer">
-    <property name="delay" value="${droids.delay.request}"/>
+    <property name="delayMillis" value="${droids.delay.request}"/>
   </bean>
 </beans>

Modified: incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-factories-context.xml
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-factories-context.xml?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-factories-context.xml (original)
+++ incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core-factories-context.xml Thu Nov 13 07:46:22 2008
@@ -22,7 +22,7 @@
        http://cocoon.apache.org/schema/configurator http://cocoon.apache.org/schema/configurator/cocoon-configurator-1.1.0.xsd">
   
   <!-- Core -  factories register -->
-  <bean id="org.apache.droids.Core" class="org.apache.droids.Core">
+  <bean id="org.apache.droids.dynamic.DroidsConfig" class="org.apache.droids.dynamic.DroidsConfig">
     <property name="droids"
       ref="org.apache.droids.helper.factories.DroidFactory"/>
     <property name="protocolFactory"

Modified: incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core.properties
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core.properties?rev=713743&r1=713742&r2=713743&view=diff
==============================================================================
--- incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core.properties (original)
+++ incubator/droids/trunk/droids-spring/src/main/java/org/apache/droids/dynamic/droids-core.properties Thu Nov 13 07:46:22 2008
@@ -1,9 +1,7 @@
-droids.maxThreads=@droids.maxThreads@
-droids.initial.url=@droids.initial.url@
-droids.queue.maxDepth=@droids.queue.maxDepth@
-droids.queue.maxSize=@droids.queue.maxSize@
-droids.protocol.http.force=@droids.protocol.http.force@
-droids.filter.regex=@droids.filter.regex@
-droids.handler.save.dir=@droids.handler.save.dir@
-droids.handler.save.includeHost=@droids.handler.save.includeHost@
-droids.delay.request=@droids.delay.request@
\ No newline at end of file
+droids.maxThreads=5
+droids.queue.maxSize=-1
+droids.protocol.http.force=true
+droids.filter.regex=classpath:/regex-urlfilter.txt
+droids.handler.save.dir=tmp/
+droids.handler.save.includeHost=true
+droids.delay.request=100
\ No newline at end of file