You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by ot...@apache.org on 2002/09/14 20:51:49 UTC

cvs commit: jakarta-lucene-sandbox/contributions/webcrawler-LARM run.sh

otis        2002/09/14 11:51:49

  Modified:    contributions/webcrawler-LARM run.sh
  Log:
  - Modified to make it usable.  This way we don't have to use Ant to run LARM.
  
  Revision  Changes    Path
  1.3       +40 -3     jakarta-lucene-sandbox/contributions/webcrawler-LARM/run.sh
  
  Index: run.sh
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/contributions/webcrawler-LARM/run.sh,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- run.sh	22 May 2002 23:09:16 -0000	1.2
  +++ run.sh	14 Sep 2002 18:51:49 -0000	1.3
  @@ -1,4 +1,41 @@
   #!/bin/sh
  -rm -r logs
  -mkdir logs
  -java -server -Xmx400mb -classpath classes:libs/jakarta-oro-2.0.5.jar de.lanlab.larm.fetcher.FetcherMain -start http://your.server.here/ -restrictto http://[^/]*\.your\.server\.here.* -threads 15  
  +
  +#
  +# $Id$
  +#
  +
  +BASE_DIR=./runtime
  +LOG_DIR=$BASE_DIR/logs
  +CACHE_DIR=$BASE_DIR/cachingqueue
  +CLASSPATH=build/classes:libs/jakarta-oro-2.0.5.jar:libs/HTTPClient.zip:/usr/local/jakarta-lucene/lucene.jar
  +SLEEP_TIME=2
  +
  +if [ $# -lt 4 ]
  +then
  +    echo "Usage: `basename $0` <start url> <score regex> <# threads> <max mem>" >&2
  +    exit 1
  +fi
  +
  +START_URL=$1
  +SCOPE_REGEX=$2
  +THREAD_COUNT=$3
  +MAX_MEM=$4
  +
  +
  +echo Removing $LOG_DIR...
  +sleep $SLEEP_TIME
  +rm -r $LOG_DIR
  +echo Removing $CACHE_DIR...
  +sleep $SLEEP_TIME
  +rm -r $CACHE_DIR
  +echo Creating $LOG_DIR
  +sleep $SLEEP_TIME
  +mkdir -p $LOG_DIR
  +echo Creating $CACHE_DIR
  +sleep $SLEEP_TIME
  +mkdir -p $CACHE_DIR
  +
  +CMD="java -server -Xmx$MAX_MEM -classpath $CLASSPATH de.lanlab.larm.fetcher.FetcherMain -start $START_URL -restrictto $SCOPE_REGEX -threads $THREAD_COUNT"
  +echo Starting LARM with: $CMD
  +
  +$CMD
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>