You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by cm...@apache.org on 2003/04/11 15:43:41 UTC
cvs commit: jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher FetcherMain.java
cmarschner 2003/04/11 06:43:41
Modified: contributions/webcrawler-LARM CHANGES.txt
contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher
FetcherMain.java
Removed: contributions/webcrawler-LARM build.properties.sample
build.sh run.bat run.sh
Log:
fixed build issues
Revision Changes Path
1.4 +4 -1 jakarta-lucene-sandbox/contributions/webcrawler-LARM/CHANGES.txt
Index: CHANGES.txt
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/webcrawler-LARM/CHANGES.txt,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- CHANGES.txt 18 Jun 2002 00:49:57 -0000 1.3
+++ CHANGES.txt 11 Apr 2003 13:43:40 -0000 1.4
@@ -1,5 +1,8 @@
$Id$
+2003-04-11 (cmarschner)
+ * fixed build issues
+
2002-06-18 (cmarschner)
* added an experimental version of Lucene storage. see FetcherMain.java for details how to use it
LuceneStorage simply saves all fields as specified in WebDocument. add a converter to the
1.8 +12 -6 jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherMain.java
Index: FetcherMain.java
===================================================================
RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/webcrawler-LARM/src/de/lanlab/larm/fetcher/FetcherMain.java,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- FetcherMain.java 22 Oct 2002 15:05:07 -0000 1.7
+++ FetcherMain.java 11 Apr 2003 13:43:41 -0000 1.8
@@ -65,7 +65,6 @@
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
-import javax.swing.UIManager;
/**
@@ -206,7 +205,7 @@
// file number, the offset within that file, and the document's length
// FIXME: default constructor for all storages + bean access methods
- storage.addDocStorage(new LogStorage(storeLog, /* save in page files? */ false,
+ storage.addDocStorage(new LogStorage(storeLog, /* save in page files? */ true,
/* page file prefix */ "logs/pagefile"));
storage.addLinkStorage(new LinkLogStorage(linksLog));
storage.addLinkStorage(messageHandler);
@@ -234,7 +233,10 @@
// dnsResolver = new DNSResolver();
hostManager = new HostManager(1000);
hostResolver = new HostResolver();
- hostResolver.initFromFile(hostResolverFile);
+ if(hostResolverFile != null && !"".equals(hostResolverFile))
+ {
+ hostResolver.initFromFile(hostResolverFile);
+ }
hostManager.setHostResolver(hostResolver);
// hostManager.addSynonym("www.fachsprachen.uni-muenchen.de", "www.fremdsprachen.uni-muenchen.de");
@@ -248,6 +250,10 @@
fetcher = new Fetcher(nrThreads, storage, storage, hostManager);
+ urlLengthFilter = new URLLengthFilter(500, lengthLog);
+
+ //knownPathsFilter = new KnownPathsFilter()
+
// prevent message box popups
HTTPConnection.setDefaultAllowUserInteraction(false);
@@ -278,7 +284,7 @@
messageHandler.addListener(urlScopeFilter);
messageHandler.addListener(reFilter);
messageHandler.addListener(urlVisitedFilter);
- messageHandler.addListener(knownPathsFilter);
+ //messageHandler.addListener(knownPathsFilter);
messageHandler.addListener(fetcher);
@@ -484,7 +490,7 @@
// replaced by HTTPClient
FetcherMain f = new FetcherMain(nrThreads, hostResolverFile);
- if (showInfo || "".equals(hostResolverFile) || (startURLs.isEmpty() && gui == false))
+ if (showInfo || (startURLs.isEmpty() && gui == false))
{
System.out.println("The LARM crawler\n" +
"\n" +
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org