You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by to...@apache.org on 2013/04/03 08:07:28 UTC
svn commit: r1463870 - in /incubator/droids/branches/0.2.x-cleanup:
droids-core/src/main/java/org/apache/droids/core/
droids-crawler/src/main/java/org/apache/droids/crawler/
droids-crawler/src/main/java/org/apache/droids/fetcher/
droids-nekohtml/src/ma...
Author: tobr
Date: Wed Apr 3 08:07:28 2013
New Revision: 1463870
URL: http://svn.apache.org/r1463870
Log:
cleaned up the code
added getQueue to the Droid interface
changed to private members and created getter/setters
Removed:
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/protocol/
Modified:
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java
incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java
incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java
incubator/droids/branches/0.2.x-cleanup/droids-nekohtml/src/main/java/org/apache/droids/nekohtml/NekoHtmlParser.java
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileWorker.java
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/SimpleWalkingDroid.java
incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/WalkingDroid.java
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/AbstractDroid.java Wed Apr 3 08:07:28 2013
@@ -33,14 +33,14 @@ import org.slf4j.LoggerFactory;
* @version 1.0
*/
public abstract class AbstractDroid<T extends Task> implements Droid<T> {
- protected final Queue<T> queue;
- protected final TaskMaster<T> taskMaster;
- protected Fetcher<T> fetcher;
- protected ParserFactory parserFactory;
- protected FilterFactory filterFactory;
- protected HandlerFactory handlerFactory;
+ private final Queue<T> queue;
+ private final TaskMaster<T> taskMaster;
+ private final ParserFactory parserFactory;
+ private final FilterFactory filterFactory;
+ private final HandlerFactory handlerFactory;
+ private Fetcher<T> fetcher;
- protected final static Logger logger = LoggerFactory.getLogger(AbstractDroid.class);
+ private final static Logger logger = LoggerFactory.getLogger(AbstractDroid.class);
/**
* Constructor with default implementations for queue and taskmaster.
@@ -180,4 +180,9 @@ public abstract class AbstractDroid<T ex
logger.debug("finished task: " + task.getURI());
}
+ @Override
+ public Queue<T> getQueue() {
+ return queue;
+ }
+
}
\ No newline at end of file
Modified: incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-core/src/main/java/org/apache/droids/core/Droid.java Wed Apr 3 08:07:28 2013
@@ -17,6 +17,8 @@
package org.apache.droids.core;
+import java.util.Queue;
+
/**
* Interface for a droid. Droid can be seen as a "project manger" that delegates
* the work to {@link Worker} units.
@@ -101,4 +103,11 @@ public interface Droid<T extends Task> {
*/
public T filter(T task);
+ /**
+ * The queue used to track the tasks
+ *
+ * @return a queue of tasks
+ */
+ public Queue<T> getQueue();
+
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/CrawlingDroid.java Wed Apr 3 08:07:28 2013
@@ -25,12 +25,9 @@ import org.apache.droids.core.LinkedTask
import org.apache.droids.core.TaskMaster;
import org.apache.droids.core.Worker;
import org.apache.droids.fetcher.CrawlingFetcher;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
public abstract class CrawlingDroid extends AbstractDroid<LinkedTask> {
- protected final Logger logger = LoggerFactory.getLogger(CrawlingDroid.class);
- protected Collection<String> initialLocations;
+ private Collection<String> initialLocations;
/**
* CrawlingDroid. Abstract helper class.
@@ -52,6 +49,10 @@ public abstract class CrawlingDroid exte
this.initialLocations = initialLocations;
}
+ public Collection<String> getInitialLocations() {
+ return initialLocations;
+ }
+
/**
* @see org.apache.droids.core.Droid#getNewWorker()
* @return Worker<Link>
Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/crawler/SimpleCrawlingDroid.java Wed Apr 3 08:07:28 2013
@@ -22,17 +22,18 @@ import java.net.URI;
import java.net.URISyntaxException;
import java.util.Queue;
-import com.google.common.base.Preconditions;
-
import org.apache.droids.core.DroidsException;
import org.apache.droids.core.LinkedTask;
import org.apache.droids.core.TaskMaster;
import org.apache.droids.core.Worker;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* This simple CrawlingDroid uses the ReportHandler to handle all retrieved files.
*/
public class SimpleCrawlingDroid extends CrawlingDroid {
+ private static final Logger LOGGER = LoggerFactory.getLogger(SimpleCrawlingDroid.class);
public SimpleCrawlingDroid(Queue<LinkedTask> queue, TaskMaster<LinkedTask> taskMaster) {
super(queue, taskMaster);
@@ -40,17 +41,23 @@ public class SimpleCrawlingDroid extends
@Override
public void start() throws DroidsException {
- Preconditions.checkState(initialLocations != null || !initialLocations.isEmpty(),
- "CrawlingDroid requires at least one starting file");
- for (String location : initialLocations) {
- try {
- URI uri = new URI(location);
- queue.offer(new CrawlerTask(uri));
- } catch (URISyntaxException ex) {
- logger.error(ex.getMessage());
+ if (getInitialLocations() != null) {
+ if (!getInitialLocations().isEmpty()) {
+ for (String location : getInitialLocations()) {
+ try {
+ URI uri = new URI(location);
+ getQueue().offer(new CrawlerTask(uri));
+ } catch (URISyntaxException ex) {
+ LOGGER.error(ex.getMessage());
+ }
+ }
+ super.start();
+ } else {
+ throw new DroidsException("CrawlingDroid requires at least one starting file");
}
+ } else {
+ throw new DroidsException("CrawlingDroid requires at least one starting file");
}
- super.start();
}
@Override
Modified: incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-crawler/src/main/java/org/apache/droids/fetcher/CrawlingFetcher.java Wed Apr 3 08:07:28 2013
@@ -111,8 +111,6 @@ public class CrawlingFetcher implements
ContentEntity contentEntity = task.getContentEntity();
InputStream instream = entity.getContent();
contentEntity.setContent(instream);
- // not used
- //Map<String, String> headerData = new HashMap<String, String>();
for (Header header : response.getAllHeaders()) {
contentEntity.set(header.getName(), header.getValue());
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-nekohtml/src/main/java/org/apache/droids/nekohtml/NekoHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-nekohtml/src/main/java/org/apache/droids/nekohtml/NekoHtmlParser.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-nekohtml/src/main/java/org/apache/droids/nekohtml/NekoHtmlParser.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-nekohtml/src/main/java/org/apache/droids/nekohtml/NekoHtmlParser.java Wed Apr 3 08:07:28 2013
@@ -67,10 +67,11 @@ public class NekoHtmlParser<T extends Ta
this(new HashMap<String, String>());
}
- public NekoHtmlParser(HashMap<String, String> selectors) {
+ public NekoHtmlParser(Map<String, String> selectors) {
this.patterns = new HashMap<String, Pattern>();
- setSelectors(selectors);
- if (parser == null) initParser();
+ this.selectors = selectors;
+ initPatterns();
+ initParser();
}
@@ -108,7 +109,7 @@ public class NekoHtmlParser<T extends Ta
*
* @param selectors Map of selectors
*/
- public void setSelectors(HashMap<String, String> selectors) {
+ public void setSelectors(Map<String, String> selectors) {
this.selectors = selectors;
initPatterns();
}
@@ -242,13 +243,13 @@ public class NekoHtmlParser<T extends Ta
}
// if the path matches a pattern, starts recording the matching content.
- String path = getCurrentPath();
+ String currentPath = getCurrentPath();
Iterator<Entry<String, Pattern>> entries = patterns.entrySet().iterator();
while (entries.hasNext()) {
Entry<String, Pattern> entry = entries.next();
String patternName = entry.getKey();
Pattern patternValue = entry.getValue();
- Matcher matcher = patternValue.matcher(path);
+ Matcher matcher = patternValue.matcher(currentPath);
if (matcher.find()) {
valueRecorders.put(patternName, "");
}
@@ -271,13 +272,13 @@ public class NekoHtmlParser<T extends Ta
level--;
// if the path matches a selector, stores the matching content.
- String path = getCurrentPath();
+ String currentPath = getCurrentPath();
Iterator<Entry<String, Pattern>> entries = patterns.entrySet().iterator();
while (entries.hasNext()) {
Entry<String, Pattern> entry = entries.next();
String patternName = entry.getKey();
Pattern patternValue = entry.getValue();
- Matcher matcher = patternValue.matcher(path);
+ Matcher matcher = patternValue.matcher(currentPath);
if (matcher.find()) {
// add the matching content to the solr document.
String value = valueRecorders.remove(patternName);
Modified: incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileWorker.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileWorker.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileWorker.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/FileWorker.java Wed Apr 3 08:07:28 2013
@@ -27,7 +27,7 @@ import java.io.IOException;
public class FileWorker implements Worker<FileTask> {
private final Logger logger = LoggerFactory.getLogger(FileWorker.class);
- final WalkingDroid droid;
+ private final WalkingDroid droid;
public FileWorker(WalkingDroid droid) {
this.droid = droid;
@@ -43,8 +43,9 @@ public class FileWorker implements Worke
for (File f : files) {
FileTask newTask =
droid.filter(new FileTask(f, task.getDepth() + 1));
- if (newTask != null)
+ if (newTask != null) {
droid.add(newTask);
+ }
}
}
} else {
Modified: incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/SimpleWalkingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/SimpleWalkingDroid.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/SimpleWalkingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/SimpleWalkingDroid.java Wed Apr 3 08:07:28 2013
@@ -16,19 +16,13 @@
*/
package org.apache.droids.walker;
-import com.google.common.base.Preconditions;
-import org.apache.droids.core.AbstractDroid;
import org.apache.droids.core.DroidsException;
import org.apache.droids.core.TaskMaster;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.File;
-import java.util.Collection;
import java.util.Queue;
public class SimpleWalkingDroid extends WalkingDroid {
- private static Logger logger = LoggerFactory.getLogger(SimpleWalkingDroid.class);
public SimpleWalkingDroid(Queue<FileTask> queue, TaskMaster<FileTask> taskMaster) {
super(queue, taskMaster);
@@ -36,14 +30,16 @@ public class SimpleWalkingDroid extends
@Override
public void start() throws DroidsException {
- Preconditions.checkState(initialFiles != null,
- "FileWalker requires at least one starting file");
- Preconditions.checkState(!initialFiles.isEmpty(),
- "FileWalker requires at least one starting file");
- for (File file : initialFiles) {
- queue.add(new FileTask(file, 0));
+ if (getInitialFiles() != null) {
+ if (!getInitialFiles().isEmpty()) {
+ for (File file : getInitialFiles()) {
+ getQueue().add(new FileTask(file, 0));
+ }
+ super.start();
+ }
+ } else {
+ throw new DroidsException( "FileWalker requires at least one starting file");
}
- super.start();
}
}
Modified: incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/WalkingDroid.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/WalkingDroid.java?rev=1463870&r1=1463869&r2=1463870&view=diff
==============================================================================
--- incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/WalkingDroid.java (original)
+++ incubator/droids/branches/0.2.x-cleanup/droids-walker/src/main/java/org/apache/droids/walker/WalkingDroid.java Wed Apr 3 08:07:28 2013
@@ -24,7 +24,7 @@ import org.apache.droids.core.AbstractDr
import org.apache.droids.core.TaskMaster;
public abstract class WalkingDroid extends AbstractDroid<FileTask> {
- protected Collection<File> initialFiles;
+ private Collection<File> initialFiles;
public WalkingDroid() {
super();
@@ -40,6 +40,10 @@ public abstract class WalkingDroid exten
this.initialFiles = initialFiles;
}
+ public Collection<File> getInitialFiles() {
+ return initialFiles;
+ }
+
@Override
public FileWorker getNewWorker() {
return new FileWorker(this);