You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by ot...@apache.org on 2002/09/15 21:30:41 UTC
cvs commit: jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net HostInfo.java
otis 2002/09/15 12:30:40
Modified: contributions/webcrawler-LARM/src/de/lanlab/larm/net
HostInfo.java
Log:
- Moved constructor up, made attributes private, added a FIXME and a TODO.
Revision Changes Path
1.2 +32 -28 jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HostInfo.java
Index: HostInfo.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HostInfo.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- HostInfo.java 17 Jun 2002 14:00:13 -0000 1.1
+++ HostInfo.java 15 Sep 2002 19:30:40 -0000 1.2
@@ -52,6 +52,7 @@
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
+
package de.lanlab.larm.net;
import java.util.HashMap;
@@ -62,8 +63,11 @@
import de.lanlab.larm.fetcher.Message;
/**
- * contains information about a host. If a host doesn't respond too often, it's
- * excluded from the crawl. This class is used by the HostManager
+ * Contains information about a host. If a host doesn't respond too often, it's
+ * excluded from the crawl. This class is used by the HostManager.
+ * TODO: there needs to be a way to re-include the host in the crawl. Perhaps
+ * all hosts marked as unhealthy should be checked periodically and marked
+ * healthy again, if they respond.
*
* @author Clemens Marschner
* @created 16. Februar 2002
@@ -73,24 +77,40 @@
{
final static String[] emptyKeepOutDirectories = new String[0];
- int id;
+ private int id;
- int healthyCount = 5;
+ private int healthyCount = 5;
// five strikes, and you're out
- boolean isReachable = true;
+ private boolean isReachable = true;
- boolean robotTxtChecked = false;
+ private boolean robotTxtChecked = false;
- String[] disallows;
+ private String[] disallows;
// robot exclusion
- boolean isLoadingRobotsTxt = false;
+ private boolean isLoadingRobotsTxt = false;
- Queue queuedRequests = null;
+ private Queue queuedRequests = null;
// robot exclusion
- String hostName;
+ private String hostName;
+
+
+ //LinkedList synonyms = new LinkedList();
+
+ /**
+ * Constructor for the HostInfo object
+ *
+ * @param hostName Description of the Parameter
+ * @param id Description of the Parameter
+ */
+ public HostInfo(String hostName, int id)
+ {
+ this.id = id;
+ this.disallows = HostInfo.emptyKeepOutDirectories;
+ this.hostName = hostName;
+ }
/**
@@ -157,22 +177,6 @@
}
- //LinkedList synonyms = new LinkedList();
-
- /**
- * Constructor for the HostInfo object
- *
- * @param hostName Description of the Parameter
- * @param id Description of the Parameter
- */
- public HostInfo(String hostName, int id)
- {
- this.id = id;
- this.disallows = HostInfo.emptyKeepOutDirectories;
- this.hostName = hostName;
- }
-
-
/**
* is this host reachable and responding?
*
@@ -247,6 +251,7 @@
this.isLoadingRobotsTxt = isLoading;
if (isLoading)
{
+ // FIXME: move '100' to properties
this.queuedRequests = new CachingQueue("HostInfo_" + id + "_QueuedRequests", 100);
}
@@ -294,5 +299,4 @@
}
return true;
}
-
}
--
To unsubscribe, e-mail: <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>