You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by rf...@apache.org on 2012/04/30 02:34:34 UTC

svn commit: r1332046 - in /incubator/droids/trunk: droids-core/src/main/java/org/apache/droids/api/ droids-core/src/main/java/org/apache/droids/impl/ droids-core/src/main/java/org/apache/droids/parse/html/ droids-core/src/main/java/org/apache/droids/ro...

Author: rfrovarp
Date: Mon Apr 30 02:34:34 2012
New Revision: 1332046

URL: http://svn.apache.org/viewvc?rev=1332046&view=rev
Log:
More cleanup.

Modified:
    incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/Droid.java
    incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MaxDepthTaskValidator.java
    incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java
    incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java
    incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/walker/SimpleWalkingDroid.java
    incubator/droids/trunk/droids-solr/src/main/java/org/apache/droids/solr/AdvancedSolrHandler.java

Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/Droid.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/Droid.java?rev=1332046&r1=1332045&r2=1332046&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/Droid.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/api/Droid.java Mon Apr 30 02:34:34 2012
@@ -36,10 +36,10 @@ public interface Droid<T extends Task>
    * Initialize the queue. Can have different implementation but the main groups
    * normally are
    * <ol>
-   * <li>add only one url, from which we then start crawling
-   * <li>add an array of start urls and then crawl them
-   * <li>add an array of urls as fixed subset (no further crawling done)
-   * 
+   * <li>add only one url, from which we then start crawling</li>
+   * <li>add an array of start urls and then crawl them</li>
+   * <li>add an array of urls as fixed subset (no further crawling done)</li>
+   * </ol>
    * @throws DroidsException
    */
   void init() throws DroidsException;

Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MaxDepthTaskValidator.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MaxDepthTaskValidator.java?rev=1332046&r1=1332045&r2=1332046&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MaxDepthTaskValidator.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MaxDepthTaskValidator.java Mon Apr 30 02:34:34 2012
@@ -16,10 +16,11 @@
  */
 package org.apache.droids.impl;
 
-import org.apache.droids.api.*;
+import org.apache.droids.api.Task;
+import org.apache.droids.api.TaskValidator;
 
 /**
- * A simple 
+ * A simple task validator that honors task depth.
  */
 public class MaxDepthTaskValidator<T extends Task> implements TaskValidator<T> {
   private int maxDepth = -1;

Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java?rev=1332046&r1=1332045&r2=1332046&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/impl/MultiThreadedTaskMaster.java Mon Apr 30 02:34:34 2012
@@ -40,8 +40,8 @@ import org.slf4j.LoggerFactory;
 public class MultiThreadedTaskMaster<T extends Task> implements TaskMaster<T>
 {
 
-  protected final Logger log = LoggerFactory.getLogger(AbstractDroid.class);
-  private final long TICKLE_TIME = 1000L;
+  protected static final Logger LOG = LoggerFactory.getLogger(AbstractDroid.class);
+  private static final long TICKLE_TIME = 1000L;
   
   /**
    * The execution state
@@ -88,8 +88,8 @@ public class MultiThreadedTaskMaster<T e
   @Override
   public void start(Queue<T> queue, Droid<T> droid)
   {
-    if (log.isInfoEnabled()) {
-      log.info("Start the executor service.");
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Start the executor service.");
     }
 
     state = ExecutionState.RUNNING;
@@ -104,7 +104,7 @@ public class MultiThreadedTaskMaster<T e
       try {
         Thread.sleep(TICKLE_TIME);
       } catch(InterruptedException ignored) {
-        log.error("", ignored);
+        LOG.error("", ignored);
       }
       pool.execute(new TaskExecutor(droid));
     }
@@ -116,8 +116,8 @@ public class MultiThreadedTaskMaster<T e
   public void stop()
   {
     // debug
-    if (log.isInfoEnabled()) {
-      log.info("Stop the executor service.");
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Stop the executor service.");
     }
 
     state = ExecutionState.STOPPED;
@@ -134,15 +134,15 @@ public class MultiThreadedTaskMaster<T e
 
         // Wait a while for to respond to being canceled
         if (!pool.awaitTermination(1, TimeUnit.SECONDS)) {
-          if (log.isInfoEnabled()) {
-            log.info("Scheduler did not stop.");
+          if (LOG.isInfoEnabled()) {
+            LOG.info("Scheduler did not stop.");
           }
         }
       }
     } catch (InterruptedException ex) {
 
-      if (log.isInfoEnabled()) {
-        log.info("Force scheduler to stop.");
+      if (LOG.isInfoEnabled()) {
+        LOG.info("Force scheduler to stop.");
       }
 
       // (Re-)Cancel if current thread also interrupted
@@ -153,8 +153,8 @@ public class MultiThreadedTaskMaster<T e
     }
 
     // debug
-    if (log.isInfoEnabled()) {
-      log.info("Scheduler stopped.");
+    if (LOG.isInfoEnabled()) {
+      LOG.info("Scheduler stopped.");
     }
 
   }
@@ -285,7 +285,7 @@ public class MultiThreadedTaskMaster<T e
         try {
           Thread.sleep(TICKLE_TIME);
         } catch (InterruptedException e) {
-          log.error("", e);
+          LOG.error("", e);
         }
       }
       
@@ -341,7 +341,7 @@ public class MultiThreadedTaskMaster<T e
         try {
           Thread.sleep(TICKLE_TIME);
         } catch (InterruptedException e) {
-          log.error("", e);
+          LOG.error("", e);
         }
         task = queue.poll();
       }
@@ -355,8 +355,8 @@ public class MultiThreadedTaskMaster<T e
           }
 
           // debug
-          if (log.isDebugEnabled()) {
-            log.debug("Worker [" + worker + "] execute task [" + task + "].");
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Worker [" + worker + "] execute task [" + task + "].");
           }
 
           // execute the task
@@ -365,8 +365,8 @@ public class MultiThreadedTaskMaster<T e
           }
 
           // debug
-          if (log.isDebugEnabled()) {
-            log.debug("Worker [" + worker + "] executed task [" + task + "] with success.");
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Worker [" + worker + "] executed task [" + task + "] with success.");
           }
 
           // monitor the execution of the task
@@ -380,13 +380,13 @@ public class MultiThreadedTaskMaster<T e
 
         } catch (Exception ex) {
           // debug
-          if (log.isDebugEnabled()) {
-            log.debug("Worker [" + worker + "] executed task [" + task + "] without success.");
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Worker [" + worker + "] executed task [" + task + "] without success.");
           }
 
           // debug
-          if (log.isErrorEnabled()) {
-            log.error("", ex);
+          if (LOG.isErrorEnabled()) {
+            LOG.error("", ex);
           }
 
           // monitor the exception

Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java?rev=1332046&r1=1332045&r2=1332046&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/parse/html/LinkExtractor.java Mon Apr 30 02:34:34 2012
@@ -29,6 +29,8 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.droids.LinkTask;
 import org.apache.droids.api.Link;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
@@ -53,7 +55,10 @@ public class LinkExtractor extends Defau
    */
   private static final String BASE_ATTRIBUTE = "href";
 
-  protected final Log log = LogFactory.getLog(this.getClass());
+  /**
+   * Logger
+   */
+  private static final Logger LOG = LoggerFactory.getLogger(LinkExtractor.class);
 
   /**
    * Base url for host reference
@@ -114,11 +119,11 @@ public class LinkExtractor extends Defau
     if(checkBase && BASE_ELEMENT.equalsIgnoreCase(loc) && att.getValue(BASE_ATTRIBUTE) != null) {
       try {
         baseUri = new URI(att.getValue(BASE_ATTRIBUTE));
-        log.debug("Found base URI: " + baseUri);
+        LOG.debug("Found base URI: " + baseUri);
         checkBase = false;
       } 
       catch ( URISyntaxException e) {
-        log.error("Base URI not valid: " + att.getValue(BASE_ATTRIBUTE));
+        LOG.error("Base URI not valid: " + att.getValue(BASE_ATTRIBUTE));
       }
     }
     
@@ -129,7 +134,7 @@ public class LinkExtractor extends Defau
       linkAtt = elements.get(elem);
       if (elem.equalsIgnoreCase(loc) && att.getValue(linkAtt) != null) {
         link = getURI(att.getValue(linkAtt));
-        log.debug("Found element: " + elem + " with link: " + link);
+        LOG.debug("Found element: " + elem + " with link: " + link);
         if (link != null) {
         	addOutlinkURI(link.toString());
         	link = null;
@@ -161,7 +166,7 @@ public class LinkExtractor extends Defau
   public void endDocument() throws SAXException 
   {
     history = null;
-    log.debug("Found " + links.size() + " outliks");
+    LOG.debug("Found " + links.size() + " outliks");
   }
 
   /**
@@ -169,10 +174,10 @@ public class LinkExtractor extends Defau
    * @param anchorText Text to be added
    */
   private void addAnchorText(String anchorText) {
-    if(links.size() > 0) {
+    if (!links.isEmpty()) {
       LinkTask l = (LinkTask) links.get(links.size() - 1);
       l.setAnchorText(anchorText.replaceAll("\\s+", " ").trim());
-      log.debug("Adding anchor: " + l.getAnchorText() + " on link: " + l);
+      LOG.debug("Adding anchor: " + l.getAnchorText() + " on link: " + l);
     } 
   }
 
@@ -188,7 +193,7 @@ public class LinkExtractor extends Defau
       links = new ArrayList<Link>();
     if (history.add(link.toString())) {
       links.add(new LinkTask(base, link, base.getDepth() + 1));
-      log.debug("Added outlink: " + link + " with depth: " + base.getDepth() + 1);
+      LOG.debug("Added outlink: " + link + " with depth: " + base.getDepth() + 1);
     }
   }
 
@@ -217,7 +222,7 @@ public class LinkExtractor extends Defau
       }
     } 
     catch (Exception e) {
-      log.error("URI not valid: " + target);
+      LOG.error("URI not valid: " + target);
     }
     return null;
   }

Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/walker/SimpleWalkingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/walker/SimpleWalkingDroid.java?rev=1332046&r1=1332045&r2=1332046&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/walker/SimpleWalkingDroid.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/robot/walker/SimpleWalkingDroid.java Mon Apr 30 02:34:34 2012
@@ -26,8 +26,8 @@ import java.util.LinkedList;
 import java.util.Queue;
 
 import org.apache.droids.AbstractDroid;
+import org.apache.droids.api.TaskMaster;
 import org.apache.droids.exception.InvalidTaskException;
-import org.apache.droids.api.*;
 import org.apache.droids.impl.MultiThreadedTaskMaster;
 
 public class SimpleWalkingDroid extends AbstractDroid<FileTask> implements WalkingDroid

Modified: incubator/droids/trunk/droids-solr/src/main/java/org/apache/droids/solr/AdvancedSolrHandler.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-solr/src/main/java/org/apache/droids/solr/AdvancedSolrHandler.java?rev=1332046&r1=1332045&r2=1332046&view=diff
==============================================================================
--- incubator/droids/trunk/droids-solr/src/main/java/org/apache/droids/solr/AdvancedSolrHandler.java (original)
+++ incubator/droids/trunk/droids-solr/src/main/java/org/apache/droids/solr/AdvancedSolrHandler.java Mon Apr 30 02:34:34 2012
@@ -21,6 +21,7 @@ import java.net.URI;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
+import java.util.Map;
 import java.util.Set;
 import java.util.Stack;
 import java.util.Map.Entry;
@@ -68,7 +69,7 @@ public class AdvancedSolrHandler impleme
 	 * The HashMap's key matches the Solr field.
 	 * The HashMap's value is an absolute path corresponding to an element.
 	 */
-	private HashMap<String, String> selectors;
+	private Map<String, String> selectors;
 	
 	/**
 	 * A content handler
@@ -97,7 +98,7 @@ public class AdvancedSolrHandler impleme
 	/**
 	 * @return the current path selectors
 	 */
-	public HashMap<String, String> getSelectors() {
+	public Map<String, String> getSelectors() {
 		return selectors;
 	}
 
@@ -141,7 +142,7 @@ public class AdvancedSolrHandler impleme
 		
 		if (parser == null) initParser();
 		
-		if (selectors.size() > 0) {
+		if (!selectors.isEmpty()) {
 			contentHandler.initDocument(doc);
 			try {
 				parser.setContentHandler(contentHandler);
@@ -187,36 +188,36 @@ public class AdvancedSolrHandler impleme
 		/**
 		 * the patterns which match element's path
 		 */
-		private HashMap<String, Pattern> patterns = new HashMap<String, Pattern>();
+		private Map<String, Pattern> patterns = new HashMap<String, Pattern>();
 
 		/**
 		 * stores the values which match the patterns
 		 */
-		private HashMap<String, String> valueRecorders = new HashMap<String, String>();
+		private Map<String, String> valueRecorders = new HashMap<String, String>();
 
 		/**
 		 * A two dimensional stack used to store the current path
 		 */
 		private Stack<Stack<String>> path = new Stack<Stack<String>>();
 
-		private Integer level = 0;
+		private int level = 0;
 
-		private Integer lastLevel = 0;
+		private int lastLevel = 0;
 		
 		/**
 		 * Constructor
 		 * 
-		 * @param selectors an HashMap which contains selectors
+		 * @param selectors an Map which contains selectors
 		 */
-		public SolrContentHandler(HashMap<String, String> selectors) {
+		public SolrContentHandler(Map<String, String> selectors) {
 			initPatterns(selectors);
 		}
 		
 		/**
+		 * Initialize patterns
 		 * @param selectors
-		 * @return
 		 */
-		public void initPatterns(HashMap<String, String> selectors) {
+		public void initPatterns(Map<String, String> selectors) {
 			if (selectors != null) {
 				
 				// clear the current patterns