You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by to...@apache.org on 2013/04/03 08:07:28 UTC

svn commit: r1463870 - in /incubator/droids/branches/0.2.x-cleanup: droids-core/src/main/java/org/apache/droids/core/ droids-crawler/src/main/java/org/apache/droids/crawler/ droids-crawler/src/main/java/org/apache/droids/fetcher/ droids-nekohtml/src/ma...

Author: tobr
Date: Wed Apr  3 08:07:28 2013
New Revision: 1463870

URL: http://svn.apache.org/r1463870
Log:
cleaned up the code
added getQueue to the Droid interface
changed to private members and created getter/setters

Removed:
    incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/protocol/
Modified:
    incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java
    incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java
    incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java
    incubator/droids/branches/0.2.x-cleanup/droids-nekohtml/src/main/java/org/apache/droids/nekohtml/NekoHtmlParser.java
    incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileWorker.java
    incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/SimpleWalkingDroid.java
    incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/WalkingDroid.java

Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java Wed Apr  3 08:07:28 2013
@@ -33,14 +33,14 @@ import org.slf4j.LoggerFactory;
  * @version 1.0
  */
 public abstract class AbstractDroid<T extends Task> implements Droid<T> {
-    protected final Queue<T> queue;
-    protected final TaskMaster<T> taskMaster;
-    protected Fetcher<T> fetcher;
-    protected ParserFactory parserFactory;
-    protected FilterFactory filterFactory;
-    protected HandlerFactory handlerFactory;
+    private final Queue<T> queue;
+    private final TaskMaster<T> taskMaster;
+    private final ParserFactory parserFactory;
+    private final FilterFactory filterFactory;
+    private final HandlerFactory handlerFactory;
+    private Fetcher<T> fetcher;
 
-    protected final static Logger logger = LoggerFactory.getLogger(AbstractDroid.class);
+    private final static Logger logger = LoggerFactory.getLogger(AbstractDroid.class);
 
     /**
      * Constructor with default implementations for queue and taskmaster.
@@ -180,4 +180,9 @@ public abstract class AbstractDroid<T ex
         logger.debug("finished task: " + task.getURI());
     }
 
+    @Override
+    public Queue<T> getQueue() {
+        return queue;
+    }
+
 }
\ No newline at end of file

Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java Wed Apr  3 08:07:28 2013
@@ -17,6 +17,8 @@
 package org.apache.droids.core;
 
 
+import java.util.Queue;
+
 /**
  * Interface for a droid. Droid can be seen as a "project manger" that delegates
  * the work to {@link Worker} units.
@@ -101,4 +103,11 @@ public interface Droid<T extends Task> {
      */
     public T filter(T task);
 
+    /**
+     * The queue used to track the tasks
+     *
+     * @return a queue of tasks
+     */
+    public Queue<T> getQueue();
+
 }

Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java Wed Apr  3 08:07:28 2013
@@ -25,12 +25,9 @@ import org.apache.droids.core.LinkedTask
 import org.apache.droids.core.TaskMaster;
 import org.apache.droids.core.Worker;
 import org.apache.droids.fetcher.CrawlingFetcher;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 public abstract class CrawlingDroid extends AbstractDroid<LinkedTask> {
-    protected final Logger logger = LoggerFactory.getLogger(CrawlingDroid.class);
-    protected Collection<String> initialLocations;
+    private Collection<String> initialLocations;
 
     /**
      * CrawlingDroid. Abstract helper class.
@@ -52,6 +49,10 @@ public abstract class CrawlingDroid exte
         this.initialLocations = initialLocations;
     }
 
+    public Collection<String> getInitialLocations() {
+        return initialLocations;
+    }
+
     /**
      * @see org.apache.droids.core.Droid#getNewWorker()
      * @return Worker<Link>

Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java Wed Apr  3 08:07:28 2013
@@ -22,17 +22,18 @@ import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Queue;
 
-import com.google.common.base.Preconditions;
-
 import org.apache.droids.core.DroidsException;
 import org.apache.droids.core.LinkedTask;
 import org.apache.droids.core.TaskMaster;
 import org.apache.droids.core.Worker;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * This simple CrawlingDroid uses the ReportHandler to handle all retrieved files.
  */
 public class SimpleCrawlingDroid extends CrawlingDroid {
+    private static final Logger LOGGER = LoggerFactory.getLogger(SimpleCrawlingDroid.class);
 
     public SimpleCrawlingDroid(Queue<LinkedTask> queue, TaskMaster<LinkedTask> taskMaster) {
         super(queue, taskMaster);
@@ -40,17 +41,23 @@ public class SimpleCrawlingDroid extends
 
     @Override
     public void start() throws DroidsException {
-        Preconditions.checkState(initialLocations != null || !initialLocations.isEmpty(),
-                "CrawlingDroid requires at least one starting file");
-        for (String location : initialLocations) {
-            try {
-                URI uri = new URI(location);
-                queue.offer(new CrawlerTask(uri));
-            } catch (URISyntaxException ex) {
-                logger.error(ex.getMessage());
+        if (getInitialLocations() != null) {
+            if (!getInitialLocations().isEmpty()) {
+                for (String location : getInitialLocations()) {
+                    try {
+                        URI uri = new URI(location);
+                        getQueue().offer(new CrawlerTask(uri));
+                    } catch (URISyntaxException ex) {
+                        LOGGER.error(ex.getMessage());
+                    }
+                }
+                super.start();
+            } else {
+                throw new DroidsException("CrawlingDroid requires at least one starting file");
             }
+        } else {
+            throw new DroidsException("CrawlingDroid requires at least one starting file");
         }
-        super.start();
     }
 
     @Override

Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java Wed Apr  3 08:07:28 2013
@@ -111,8 +111,6 @@ public class CrawlingFetcher implements 
                 ContentEntity contentEntity = task.getContentEntity();
                 InputStream instream = entity.getContent();
                 contentEntity.setContent(instream);
-                // not used
-                //Map<String, String> headerData = new HashMap<String, String>();
                 for (Header header : response.getAllHeaders()) {
                     contentEntity.set(header.getName(), header.getValue());
                 }

Modified: incubator/droids/branches/0.2.x-cleanup/droids-nekohtml/src/main/java/org/apache/droids/nekohtml/NekoHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-nekohtml/src/main/java/org/apache/droids/nekohtml/NekoHtmlParser.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-nekohtml/src/main/java/org/apache/droids/nekohtml/NekoHtmlParser.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-nekohtml/src/main/java/org/apache/droids/nekohtml/NekoHtmlParser.java Wed Apr  3 08:07:28 2013
@@ -67,10 +67,11 @@ public class NekoHtmlParser<T extends Ta
         this(new HashMap<String, String>());
     }
 
-    public NekoHtmlParser(HashMap<String, String> selectors) {
+    public NekoHtmlParser(Map<String, String> selectors) {
         this.patterns =  new HashMap<String, Pattern>();
-        setSelectors(selectors);
-        if (parser == null) initParser();
+        this.selectors = selectors;
+        initPatterns();
+        initParser();
     }
 
 
@@ -108,7 +109,7 @@ public class NekoHtmlParser<T extends Ta
      *
      * @param selectors Map of selectors
      */
-    public void setSelectors(HashMap<String, String> selectors) {
+    public void setSelectors(Map<String, String> selectors) {
         this.selectors = selectors;
         initPatterns();
     }
@@ -242,13 +243,13 @@ public class NekoHtmlParser<T extends Ta
             }
 
             // if the path matches a pattern, starts recording the matching content.
-            String path = getCurrentPath();
+            String currentPath = getCurrentPath();
             Iterator<Entry<String, Pattern>> entries = patterns.entrySet().iterator();
             while (entries.hasNext()) {
                 Entry<String, Pattern> entry = entries.next();
                 String patternName = entry.getKey();
                 Pattern patternValue = entry.getValue();
-                Matcher matcher = patternValue.matcher(path);
+                Matcher matcher = patternValue.matcher(currentPath);
                 if (matcher.find()) {
                     valueRecorders.put(patternName, "");
                 }
@@ -271,13 +272,13 @@ public class NekoHtmlParser<T extends Ta
             level--;
 
             // if the path matches a selector, stores the matching content.
-            String path = getCurrentPath();
+            String currentPath = getCurrentPath();
             Iterator<Entry<String, Pattern>> entries = patterns.entrySet().iterator();
             while (entries.hasNext()) {
                 Entry<String, Pattern> entry = entries.next();
                 String patternName = entry.getKey();
                 Pattern patternValue = entry.getValue();
-                Matcher matcher = patternValue.matcher(path);
+                Matcher matcher = patternValue.matcher(currentPath);
                 if (matcher.find()) {
                     // add the matching content to the solr document.
                     String value = valueRecorders.remove(patternName);

Modified: incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileWorker.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileWorker.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileWorker.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileWorker.java Wed Apr  3 08:07:28 2013
@@ -27,7 +27,7 @@ import java.io.IOException;
 public class FileWorker implements Worker<FileTask> {
 
     private final Logger logger = LoggerFactory.getLogger(FileWorker.class);
-    final WalkingDroid droid;
+    private final WalkingDroid droid;
 
     public FileWorker(WalkingDroid droid) {
         this.droid = droid;
@@ -43,8 +43,9 @@ public class FileWorker implements Worke
                     for (File f : files) {
                         FileTask newTask =
                             droid.filter(new FileTask(f, task.getDepth() + 1));
-                        if (newTask != null)
+                        if (newTask != null) {
                             droid.add(newTask);
+                        }
                     }
                 }
             } else {

Modified: incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/SimpleWalkingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/SimpleWalkingDroid.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/SimpleWalkingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/SimpleWalkingDroid.java Wed Apr  3 08:07:28 2013
@@ -16,19 +16,13 @@
  */
 package org.apache.droids.walker;
 
-import com.google.common.base.Preconditions;
-import org.apache.droids.core.AbstractDroid;
 import org.apache.droids.core.DroidsException;
 import org.apache.droids.core.TaskMaster;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.File;
-import java.util.Collection;
 import java.util.Queue;
 
 public class SimpleWalkingDroid extends WalkingDroid {
-    private static Logger logger = LoggerFactory.getLogger(SimpleWalkingDroid.class);
 
     public SimpleWalkingDroid(Queue<FileTask> queue, TaskMaster<FileTask> taskMaster) {
         super(queue, taskMaster);
@@ -36,14 +30,16 @@ public class SimpleWalkingDroid extends 
 
     @Override
     public void start() throws DroidsException {
-        Preconditions.checkState(initialFiles != null,
-                "FileWalker requires at least one starting file");
-        Preconditions.checkState(!initialFiles.isEmpty(),
-                "FileWalker requires at least one starting file");
-        for (File file : initialFiles) {
-            queue.add(new FileTask(file, 0));
+        if (getInitialFiles() != null) {
+            if (!getInitialFiles().isEmpty()) {
+                for (File file : getInitialFiles()) {
+                    getQueue().add(new FileTask(file, 0));
+                }
+                super.start();
+            }
+        } else {
+            throw new DroidsException( "FileWalker requires at least one starting file");
         }
-        super.start();
     }
 
 }

Modified: incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/WalkingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/WalkingDroid.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/WalkingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/WalkingDroid.java Wed Apr  3 08:07:28 2013
@@ -24,7 +24,7 @@ import org.apache.droids.core.AbstractDr
 import org.apache.droids.core.TaskMaster;
 
 public abstract class WalkingDroid extends AbstractDroid<FileTask> {
-    protected Collection<File> initialFiles;
+    private Collection<File> initialFiles;
 
     public WalkingDroid() {
         super();
@@ -40,6 +40,10 @@ public abstract class WalkingDroid exten
             this.initialFiles = initialFiles;
     }
 
+    public Collection<File> getInitialFiles() {
+        return initialFiles;
+    }
+
     @Override
     public FileWorker getNewWorker() {
         return new FileWorker(this);