You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by cm...@apache.org on 2003/04/11 15:43:41 UTC

cvs commit: jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher FetcherMain.java

cmarschner    2003/04/11 06:43:41

  Modified:    contributions/webcrawler-LARM CHANGES.txt
               contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher
                        FetcherMain.java
  Removed:     contributions/webcrawler-LARM build.properties.sample
                        build.sh run.bat run.sh
  Log:
  fixed build issues
  
  Revision  Changes    Path
  1.4       +4 -1      jakarta-lucene-sandbox/contributions/webcrawler-LARM/CHANGES.txt
  
  Index: CHANGES.txt
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/webcrawler-LARM/CHANGES.txt,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- CHANGES.txt	18 Jun 2002 00:49:57 -0000	1.3
  +++ CHANGES.txt	11 Apr 2003 13:43:40 -0000	1.4
  @@ -1,5 +1,8 @@
   $Id$
   
  +2003-04-11 (cmarschner)
  +	* fixed build issues
  +
   2002-06-18 (cmarschner)
   	* added an experimental version of Lucene storage. see FetcherMain.java for details how to use it
   	  LuceneStorage simply saves all fields as specified in WebDocument. add a converter to the 
  
  
  
  1.8       +12 -6     jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherMain.java
  
  Index: FetcherMain.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherMain.java,v
  retrieving revision 1.7
  retrieving revision 1.8
  diff -u -r1.7 -r1.8
  --- FetcherMain.java	22 Oct 2002 15:05:07 -0000	1.7
  +++ FetcherMain.java	11 Apr 2003 13:43:41 -0000	1.8
  @@ -65,7 +65,6 @@
   import java.net.MalformedURLException;
   import java.net.URL;
   import java.util.*;
  -import javax.swing.UIManager;
   
   
   /**
  @@ -206,7 +205,7 @@
           // file number, the offset within that file, and the document's length
   
           // FIXME: default constructor for all storages + bean access methods
  -        storage.addDocStorage(new LogStorage(storeLog, /* save in page files? */ false,
  +        storage.addDocStorage(new LogStorage(storeLog, /* save in page files? */ true,
                                                /* page file prefix */ "logs/pagefile"));
           storage.addLinkStorage(new LinkLogStorage(linksLog));
           storage.addLinkStorage(messageHandler);
  @@ -234,7 +233,10 @@
           // dnsResolver = new DNSResolver();
           hostManager = new HostManager(1000);
           hostResolver = new HostResolver();
  -        hostResolver.initFromFile(hostResolverFile);
  +        if(hostResolverFile != null && !"".equals(hostResolverFile))
  +        {
  +            hostResolver.initFromFile(hostResolverFile);
  +        }
           hostManager.setHostResolver(hostResolver);
   
   //        hostManager.addSynonym("www.fachsprachen.uni-muenchen.de", "www.fremdsprachen.uni-muenchen.de");
  @@ -248,6 +250,10 @@
   
           fetcher = new Fetcher(nrThreads, storage, storage, hostManager);
   
  +        urlLengthFilter = new URLLengthFilter(500, lengthLog);
  +        
  +        //knownPathsFilter = new KnownPathsFilter()
  +        
           // prevent message box popups
           HTTPConnection.setDefaultAllowUserInteraction(false);
   
  @@ -278,7 +284,7 @@
           messageHandler.addListener(urlScopeFilter);
           messageHandler.addListener(reFilter);
           messageHandler.addListener(urlVisitedFilter);
  -        messageHandler.addListener(knownPathsFilter);
  +        //messageHandler.addListener(knownPathsFilter);
   
           messageHandler.addListener(fetcher);
   
  @@ -484,7 +490,7 @@
           // replaced by HTTPClient
   
           FetcherMain f = new FetcherMain(nrThreads, hostResolverFile);
  -        if (showInfo || "".equals(hostResolverFile) || (startURLs.isEmpty() && gui == false))
  +        if (showInfo || (startURLs.isEmpty() && gui == false))
           {
               System.out.println("The LARM crawler\n" +
                                  "\n" +
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org