You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by ot...@apache.org on 2002/09/15 21:30:41 UTC

cvs commit: jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net HostInfo.java

otis        2002/09/15 12:30:40

  Modified:    contributions/webcrawler-LARM/src/de/lanlab/larm/net
                        HostInfo.java
  Log:
  - Moved constructor up, made attributes private, added a FIXME and a TODO.
  
  Revision  Changes    Path
  1.2       +32 -28    jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HostInfo.java
  
  Index: HostInfo.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/net/HostInfo.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- HostInfo.java	17 Jun 2002 14:00:13 -0000	1.1
  +++ HostInfo.java	15 Sep 2002 19:30:40 -0000	1.2
  @@ -52,6 +52,7 @@
    *  information on the Apache Software Foundation, please see
    *  <http://www.apache.org/>.
    */
  +
   package de.lanlab.larm.net;
   
   import java.util.HashMap;
  @@ -62,8 +63,11 @@
   import de.lanlab.larm.fetcher.Message;
   
   /**
  - * contains information about a host. If a host doesn't respond too often, it's
  - * excluded from the crawl. This class is used by the HostManager
  + * Contains information about a host. If a host doesn't respond too often, it's
  + * excluded from the crawl. This class is used by the HostManager.
  + * TODO: there needs to be a way to re-include the host in the crawl.  Perhaps
  + * all hosts marked as unhealthy should be checked periodically and marked
  + * healthy again, if they respond.
    *
    * @author    Clemens Marschner
    * @created   16. Februar 2002
  @@ -73,24 +77,40 @@
   {
       final static String[] emptyKeepOutDirectories = new String[0];
   
  -    int id;
  +    private int id;
   
  -    int healthyCount = 5;
  +    private int healthyCount = 5;
   
       // five strikes, and you're out
  -    boolean isReachable = true;
  +    private boolean isReachable = true;
   
  -    boolean robotTxtChecked = false;
  +    private boolean robotTxtChecked = false;
   
  -    String[] disallows;
  +    private String[] disallows;
   
       // robot exclusion
  -    boolean isLoadingRobotsTxt = false;
  +    private boolean isLoadingRobotsTxt = false;
   
  -    Queue queuedRequests = null;
  +    private Queue queuedRequests = null;
   
       // robot exclusion
  -    String hostName;
  +    private String hostName;
  +
  +
  +    //LinkedList synonyms = new LinkedList();
  +
  +    /**
  +     * Constructor for the HostInfo object
  +     *
  +     * @param hostName  Description of the Parameter
  +     * @param id        Description of the Parameter
  +     */
  +    public HostInfo(String hostName, int id)
  +    {
  +        this.id = id;
  +        this.disallows = HostInfo.emptyKeepOutDirectories;
  +        this.hostName = hostName;
  +    }
   
   
       /**
  @@ -157,22 +177,6 @@
       }
   
   
  -    //LinkedList synonyms = new LinkedList();
  -
  -    /**
  -     * Constructor for the HostInfo object
  -     *
  -     * @param hostName  Description of the Parameter
  -     * @param id        Description of the Parameter
  -     */
  -    public HostInfo(String hostName, int id)
  -    {
  -        this.id = id;
  -        this.disallows = HostInfo.emptyKeepOutDirectories;
  -        this.hostName = hostName;
  -    }
  -
  -
       /**
        * is this host reachable and responding?
        *
  @@ -247,6 +251,7 @@
           this.isLoadingRobotsTxt = isLoading;
           if (isLoading)
           {
  +	    // FIXME: move '100' to properties
               this.queuedRequests = new CachingQueue("HostInfo_" + id + "_QueuedRequests", 100);
           }
   
  @@ -294,5 +299,4 @@
           }
           return true;
       }
  -
   }
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>