You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2014/07/05 23:13:20 UTC
svn commit: r1608135 - in /nutch: branches/2.x/CHANGES.txt
branches/2.x/src/bin/crawl branches/2.x/src/bin/nutch trunk/CHANGES.txt
trunk/src/bin/crawl trunk/src/bin/nutch
Author: snagel
Date: Sat Jul 5 21:13:19 2014
New Revision: 1608135
URL: http://svn.apache.org/r1608135
Log:
NUTCH-1566 bin/nutch to allow whitespace in paths
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/bin/crawl
nutch/branches/2.x/src/bin/nutch
nutch/trunk/CHANGES.txt
nutch/trunk/src/bin/crawl
nutch/trunk/src/bin/nutch
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1608135&r1=1608134&r2=1608135&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Sat Jul 5 21:13:19 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1566 bin/nutch to allow whitespace in paths (tejasp, snagel)
+
* NUTCH-1605 MIME type detector recognizes xlsx as zip file (snagel)
* NUTCH-385 Improve description of thread related configuration for Fetcher (jnioche,lufeng)
Modified: nutch/branches/2.x/src/bin/crawl
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/bin/crawl?rev=1608135&r1=1608134&r2=1608135&view=diff
==============================================================================
--- nutch/branches/2.x/src/bin/crawl (original)
+++ nutch/branches/2.x/src/bin/crawl Sat Jul 5 21:13:19 2014
@@ -70,12 +70,12 @@ timeLimitFetch=180
addDays=0
#############################################
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+bin="`dirname "$0"`"
+bin="`cd "$bin"; pwd`"
# determines whether mode based on presence of job file
mode=local
-if [ -f ${bin}/../*nutch*.job ]; then
+if [ -f "${bin}"/../*nutch*.job ]; then
mode=distributed
fi
@@ -92,8 +92,7 @@ if [ $mode = "distributed" ]; then
fi
# initial injection
-$bin/nutch inject $SEEDDIR -crawlId $CRAWL_ID
-
+"$bin/nutch" inject "$SEEDDIR" -crawlId "$CRAWL_ID"
if [ $? -ne 0 ]
then exit $?
fi
@@ -114,14 +113,14 @@ do
batchId=`date +%s`-$RANDOM
echo "Generating a new fetchlist"
- $bin/nutch generate $commonOptions -topN $sizeFetchlist -noNorm -noFilter -adddays $addDays -crawlId $CRAWL_ID -batchId $batchId
+ "$bin/nutch" generate $commonOptions -topN $sizeFetchlist -noNorm -noFilter -adddays $addDays -crawlId "$CRAWL_ID" -batchId $batchId
if [ $? -ne 0 ]
then exit $?
fi
echo "Fetching : "
- $bin/nutch fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch $batchId -crawlId $CRAWL_ID -threads 50
+ "$bin/nutch" fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch $batchId -crawlId "$CRAWL_ID" -threads 50
if [ $? -ne 0 ]
then exit $?
@@ -132,7 +131,7 @@ do
# enable the skipping of records for the parsing so that a dodgy document
# so that it does not fail the full task
skipRecordsOptions="-D mapred.skip.attempts.to.start.skipping=2 -D mapred.skip.map.max.skip.records=1"
- $bin/nutch parse $commonOptions $skipRecordsOptions $batchId -crawlId $CRAWL_ID
+ "$bin/nutch" parse $commonOptions $skipRecordsOptions $batchId -crawlId "$CRAWL_ID"
if [ $? -ne 0 ]
then exit $?
@@ -140,21 +139,21 @@ do
# updatedb with this batch
echo "CrawlDB update for $CRAWL_ID"
- $bin/nutch updatedb $commonOptions $batchId -crawlId $CRAWL_ID
+ "$bin/nutch" updatedb $commonOptions $batchId -crawlId "$CRAWL_ID"
if [ $? -ne 0 ]
then exit $?
fi
echo "Indexing $CRAWL_ID on SOLR index -> $SOLRURL"
- $bin/nutch index $commonOptions -D solr.server.url=$SOLRURL -all -crawlId $CRAWL_ID
+ "$bin/nutch" index $commonOptions -D solr.server.url=$SOLRURL -all -crawlId "$CRAWL_ID"
if [ $? -ne 0 ]
then exit $?
fi
echo "SOLR dedup -> $SOLRURL"
- $bin/nutch solrdedup $commonOptions $SOLRURL
+ "$bin/nutch" solrdedup $commonOptions $SOLRURL
if [ $? -ne 0 ]
then exit $?
Modified: nutch/branches/2.x/src/bin/nutch
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/bin/nutch?rev=1608135&r1=1608134&r2=1608135&view=diff
==============================================================================
--- nutch/branches/2.x/src/bin/nutch (original)
+++ nutch/branches/2.x/src/bin/nutch Sat Jul 5 21:13:19 2014
@@ -25,6 +25,14 @@
# Default is 1000.
#
# NUTCH_OPTS Extra Java runtime options.
+# Multiple options must be separated by white space.
+#
+# NUTCH_LOG_DIR Log directory (default: $NUTCH_HOME/logs)
+#
+# NUTCH_LOGFILE Log file (default: hadoop.log)
+#
+# NUTCH_CONF_DIR Path(s) to configuration files (default: $NUTCH_HOME/conf).
+# Multiple paths must be separated by a colon ':'.
#
cygwin=false
case "`uname`" in
@@ -78,13 +86,13 @@ COMMAND=$1
shift
# some directories
-THIS_DIR=`dirname "$THIS"`
-NUTCH_HOME=`cd "$THIS_DIR/.." ; pwd`
+THIS_DIR="`dirname "$THIS"`"
+NUTCH_HOME="`cd "$THIS_DIR/.." ; pwd`"
# some Java parameters
if [ "$NUTCH_JAVA_HOME" != "" ]; then
#echo "run java in $NUTCH_JAVA_HOME"
- JAVA_HOME=$NUTCH_JAVA_HOME
+ JAVA_HOME="$NUTCH_JAVA_HOME"
fi
if [ "$JAVA_HOME" = "" ]; then
@@ -94,21 +102,20 @@ fi
# NUTCH_JOB
-if [ -f ${NUTCH_HOME}/*nutch*.job ]; then
- local=false
- for f in $NUTCH_HOME/*nutch*.job; do
- NUTCH_JOB=$f;
+if [ -f "${NUTCH_HOME}"/*nutch*.job ]; then
+ local=false
+ for f in "$NUTCH_HOME"/*nutch*.job; do
+ NUTCH_JOB="$f";
done
+ # cygwin path translation
+ if $cygwin; then
+ NUTCH_JOB="`cygpath -p -w "$NUTCH_JOB"`"
+ fi
else
local=true
fi
-# cygwin path translation
-if $cygwin; then
- NUTCH_JOB=`cygpath -p -w "$NUTCH_JOB"`
-fi
-
-JAVA=$JAVA_HOME/bin/java
+JAVA="$JAVA_HOME/bin/java"
JAVA_HEAP_MAX=-Xmx1000m
# check envvars which might override default args
@@ -119,46 +126,47 @@ if [ "$NUTCH_HEAPSIZE" != "" ]; then
fi
# CLASSPATH initially contains $NUTCH_CONF_DIR, or defaults to $NUTCH_HOME/conf
-CLASSPATH=${NUTCH_CONF_DIR:=$NUTCH_HOME/conf}
-CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
+CLASSPATH="${NUTCH_CONF_DIR:=$NUTCH_HOME/conf}"
+CLASSPATH="${CLASSPATH}:$JAVA_HOME/lib/tools.jar"
# so that filenames w/ spaces are handled correctly in loops below
IFS=
# add libs to CLASSPATH
if $local; then
- for f in $NUTCH_HOME/lib/*.jar; do
- CLASSPATH=${CLASSPATH}:$f;
+ for f in "$NUTCH_HOME"/lib/*.jar; do
+ CLASSPATH="${CLASSPATH}:$f";
done
# local runtime
# add plugins to classpath
if [ -d "$NUTCH_HOME/plugins" ]; then
- CLASSPATH=${NUTCH_HOME}:${CLASSPATH}
+ CLASSPATH="${NUTCH_HOME}:${CLASSPATH}"
fi
fi
# cygwin path translation
if $cygwin; then
- CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+ CLASSPATH="`cygpath -p -w "$CLASSPATH"`"
fi
# setup 'java.library.path' for native-hadoop code if necessary
# used only in local mode
JAVA_LIBRARY_PATH=''
if [ -d "${NUTCH_HOME}/lib/native" ]; then
- JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} org.apache.hadoop.util.PlatformName | sed -e 's/ /_/g'`
-
+
+ JAVA_PLATFORM=`"${JAVA}" -classpath "$CLASSPATH" org.apache.hadoop.util.PlatformName | sed -e 's/ /_/g'`
+
if [ -d "${NUTCH_HOME}/lib/native" ]; then
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
- JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}
+ JAVA_LIBRARY_PATH="${JAVA_LIBRARY_PATH}:${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}"
else
- JAVA_LIBRARY_PATH=${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}
+ JAVA_LIBRARY_PATH="${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}"
fi
fi
fi
if [ $cygwin = true -a "X${JAVA_LIBRARY_PATH}" != "X" ]; then
- JAVA_LIBRARY_PATH=`cygpath -p -w "$JAVA_LIBRARY_PATH"`
+ JAVA_LIBRARY_PATH="`cygpath -p -w "$JAVA_LIBRARY_PATH"`"
fi
# restore ordinary behaviour
@@ -174,14 +182,14 @@ fi
#Fix log path under cygwin
if $cygwin; then
- NUTCH_LOG_DIR=`cygpath -p -w "$NUTCH_LOG_DIR"`
+ NUTCH_LOG_DIR="`cygpath -p -w "$NUTCH_LOG_DIR"`"
fi
-NUTCH_OPTS="$NUTCH_OPTS -Dhadoop.log.dir=$NUTCH_LOG_DIR"
-NUTCH_OPTS="$NUTCH_OPTS -Dhadoop.log.file=$NUTCH_LOGFILE"
+NUTCH_OPTS=($NUTCH_OPTS -Dhadoop.log.dir="$NUTCH_LOG_DIR")
+NUTCH_OPTS=("${NUTCH_OPTS[@]}" -Dhadoop.log.file="$NUTCH_LOGFILE")
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
- NUTCH_OPTS="$NUTCH_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
+ NUTCH_OPTS=("${NUTCH_OPTS[@]}" -Djava.library.path="$JAVA_LIBRARY_PATH")
fi
# figure out which class to run
@@ -229,7 +237,7 @@ CLASS=org.apache.nutch.plugin.PluginRepo
elif [ "$COMMAND" = "nutchserver" ] ; then
CLASS=org.apache.nutch.api.NutchServer
elif [ "$COMMAND" = "junit" ] ; then
- CLASSPATH=$CLASSPATH:$NUTCH_HOME/test/classes/
+ CLASSPATH="$CLASSPATH:$NUTCH_HOME/test/classes/"
CLASS=junit.textui.TestRunner
else
CLASS=$COMMAND
@@ -238,8 +246,8 @@ fi
if $local; then
# fix for the external Xerces lib issue with SAXParserFactory
- NUTCH_OPTS="-Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl $NUTCH_OPTS"
- EXEC_CALL="$JAVA $JAVA_HEAP_MAX $NUTCH_OPTS -classpath $CLASSPATH"
+ NUTCH_OPTS=(-Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl "${NUTCH_OPTS[@]}")
+ EXEC_CALL=("$JAVA" $JAVA_HEAP_MAX "${NUTCH_OPTS[@]}" -classpath "$CLASSPATH")
else
# check that hadoop can be found on the path
if [ $(which hadoop | wc -l ) -eq 0 ]; then
@@ -247,9 +255,9 @@ else
exit -1;
fi
# distributed mode
- EXEC_CALL="hadoop jar $NUTCH_JOB"
+ EXEC_CALL=(hadoop jar "$NUTCH_JOB")
fi
# run it
-exec $EXEC_CALL $CLASS "$@"
+exec "${EXEC_CALL[@]}" $CLASS "$@"
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1608135&r1=1608134&r2=1608135&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Jul 5 21:13:19 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development
+* NUTCH-1566 bin/nutch to allow whitespace in paths (tejasp, snagel)
+
* NUTCH-1605 MIME type detector recognizes xlsx as zip file (snagel)
* NUTCH-1802 Move TestbedProxy to test environment (jnioche)
Modified: nutch/trunk/src/bin/crawl
URL: http://svn.apache.org/viewvc/nutch/trunk/src/bin/crawl?rev=1608135&r1=1608134&r2=1608135&view=diff
==============================================================================
--- nutch/trunk/src/bin/crawl (original)
+++ nutch/trunk/src/bin/crawl Sat Jul 5 21:13:19 2014
@@ -69,12 +69,12 @@ numThreads=50
#############################################
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+bin="`dirname "$0"`"
+bin="`cd "$bin"; pwd`"
# determines whether mode based on presence of job file
mode=local
-if [ -f ${bin}/../*nutch*.job ]; then
+if [ -f "${bin}"/../*nutch*.job ]; then
mode=distributed
fi
@@ -91,7 +91,7 @@ if [ $mode = "distributed" ]; then
fi
# initial injection
-$bin/nutch inject $CRAWL_PATH/crawldb $SEEDDIR
+"$bin/nutch" inject "$CRAWL_PATH"/crawldb "$SEEDDIR"
if [ $? -ne 0 ]
then exit $?
@@ -110,7 +110,7 @@ do
echo `date` ": Iteration $a of $LIMIT"
echo "Generating a new segment"
- $bin/nutch generate $commonOptions $CRAWL_PATH/crawldb $CRAWL_PATH/segments -topN $sizeFetchlist -numFetchers $numSlaves -noFilter
+ "$bin/nutch" generate $commonOptions "$CRAWL_PATH"/crawldb "$CRAWL_PATH"/segments -topN $sizeFetchlist -numFetchers $numSlaves -noFilter
if [ $? -ne 0 ]
then exit $?
@@ -121,16 +121,16 @@ do
# or use ls
if [ $mode = "local" ]; then
- SEGMENT=`ls $CRAWL_PATH/segments/ | sort -n | tail -n 1`
+ SEGMENT=`ls "$CRAWL_PATH"/segments/ | sort -n | tail -n 1`
else
- SEGMENT=`hadoop fs -ls $CRAWL_PATH/segments/ | grep segments | sed -e "s/\//\\n/g" | egrep 20[0-9]+ | sort -n | tail -n 1`
+ SEGMENT=`hadoop fs -ls "$CRAWL_PATH"/segments/ | grep segments | sed -e "s/\//\\n/g" | egrep 20[0-9]+ | sort -n | tail -n 1`
fi
echo "Operating on segment : $SEGMENT"
# fetching the segment
echo "Fetching : $SEGMENT"
- $bin/nutch fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch $CRAWL_PATH/segments/$SEGMENT -noParsing -threads $numThreads
+ "$bin/nutch" fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch "$CRAWL_PATH"/segments/$SEGMENT -noParsing -threads $numThreads
if [ $? -ne 0 ]
then exit $?
@@ -141,7 +141,7 @@ do
# enable the skipping of records for the parsing so that a dodgy document
# so that it does not fail the full task
skipRecordsOptions="-D mapred.skip.attempts.to.start.skipping=2 -D mapred.skip.map.max.skip.records=1"
- $bin/nutch parse $commonOptions $skipRecordsOptions $CRAWL_PATH/segments/$SEGMENT
+ "$bin/nutch" parse $commonOptions $skipRecordsOptions "$CRAWL_PATH"/segments/$SEGMENT
if [ $? -ne 0 ]
then exit $?
@@ -149,7 +149,7 @@ do
# updatedb with this segment
echo "CrawlDB update"
- $bin/nutch updatedb $commonOptions $CRAWL_PATH/crawldb $CRAWL_PATH/segments/$SEGMENT
+ "$bin/nutch" updatedb $commonOptions "$CRAWL_PATH"/crawldb "$CRAWL_PATH"/segments/$SEGMENT
if [ $? -ne 0 ]
then exit $?
@@ -158,7 +158,7 @@ do
# note that the link inversion - indexing routine can be done within the main loop
# on a per segment basis
echo "Link inversion"
- $bin/nutch invertlinks $CRAWL_PATH/linkdb $CRAWL_PATH/segments/$SEGMENT
+ "$bin/nutch" invertlinks "$CRAWL_PATH"/linkdb "$CRAWL_PATH"/segments/$SEGMENT
if [ $? -ne 0 ]
then exit $?
@@ -172,14 +172,14 @@ do
fi
echo "Indexing $SEGMENT on SOLR index -> $SOLRURL"
- $bin/nutch index -D solr.server.url=$SOLRURL $CRAWL_PATH/crawldb -linkdb $CRAWL_PATH/linkdb $CRAWL_PATH/segments/$SEGMENT
+ "$bin/nutch" index -D solr.server.url=$SOLRURL "$CRAWL_PATH"/crawldb -linkdb "$CRAWL_PATH"/linkdb "$CRAWL_PATH"/segments/$SEGMENT
if [ $? -ne 0 ]
then exit $?
fi
echo "Cleanup on SOLR index -> $SOLRURL"
- $bin/nutch clean -D solr.server.url=$SOLRURL $CRAWL_PATH/crawldb
+ "$bin/nutch" clean -D solr.server.url=$SOLRURL "$CRAWL_PATH"/crawldb
if [ $? -ne 0 ]
then exit $?
Modified: nutch/trunk/src/bin/nutch
URL: http://svn.apache.org/viewvc/nutch/trunk/src/bin/nutch?rev=1608135&r1=1608134&r2=1608135&view=diff
==============================================================================
--- nutch/trunk/src/bin/nutch (original)
+++ nutch/trunk/src/bin/nutch Sat Jul 5 21:13:19 2014
@@ -25,6 +25,14 @@
# Default is 1000.
#
# NUTCH_OPTS Extra Java runtime options.
+# Multiple options must be separated by white space.
+#
+# NUTCH_LOG_DIR Log directory (default: $NUTCH_HOME/logs)
+#
+# NUTCH_LOGFILE Log file (default: hadoop.log)
+#
+# NUTCH_CONF_DIR Path(s) to configuration files (default: $NUTCH_HOME/conf).
+# Multiple paths must be separated by a colon ':'.
#
cygwin=false
case "`uname`" in
@@ -86,13 +94,13 @@ COMMAND=$1
shift
# some directories
-THIS_DIR=`dirname "$THIS"`
-NUTCH_HOME=`cd "$THIS_DIR/.." ; pwd`
+THIS_DIR="`dirname "$THIS"`"
+NUTCH_HOME="`cd "$THIS_DIR/.." ; pwd`"
# some Java parameters
if [ "$NUTCH_JAVA_HOME" != "" ]; then
#echo "run java in $NUTCH_JAVA_HOME"
- JAVA_HOME=$NUTCH_JAVA_HOME
+ JAVA_HOME="$NUTCH_JAVA_HOME"
fi
if [ "$JAVA_HOME" = "" ]; then
@@ -103,19 +111,18 @@ fi
local=true
# NUTCH_JOB
-if [ -f ${NUTCH_HOME}/*nutch*.job ]; then
- local=false
- for f in $NUTCH_HOME/*nutch*.job; do
- NUTCH_JOB=$f;
+if [ -f "${NUTCH_HOME}"/*nutch*.job ]; then
+ local=false
+ for f in "$NUTCH_HOME"/*nutch*.job; do
+ NUTCH_JOB="$f"
done
+ # cygwin path translation
+ if $cygwin; then
+ NUTCH_JOB="`cygpath -p -w "$NUTCH_JOB"`"
+ fi
fi
-# cygwin path translation
-if $cygwin; then
- NUTCH_JOB=`cygpath -p -w "$NUTCH_JOB"`
-fi
-
-JAVA=$JAVA_HOME/bin/java
+JAVA="$JAVA_HOME/bin/java"
JAVA_HEAP_MAX=-Xmx1000m
# check envvars which might override default args
@@ -126,46 +133,47 @@ if [ "$NUTCH_HEAPSIZE" != "" ]; then
fi
# CLASSPATH initially contains $NUTCH_CONF_DIR, or defaults to $NUTCH_HOME/conf
-CLASSPATH=${NUTCH_CONF_DIR:=$NUTCH_HOME/conf}
-CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
+CLASSPATH="${NUTCH_CONF_DIR:=$NUTCH_HOME/conf}"
+CLASSPATH="${CLASSPATH}:$JAVA_HOME/lib/tools.jar"
# so that filenames w/ spaces are handled correctly in loops below
IFS=
# add libs to CLASSPATH
if $local; then
- for f in $NUTCH_HOME/lib/*.jar; do
- CLASSPATH=${CLASSPATH}:$f;
+ for f in "$NUTCH_HOME"/lib/*.jar; do
+ CLASSPATH="${CLASSPATH}:$f";
done
# local runtime
# add plugins to classpath
if [ -d "$NUTCH_HOME/plugins" ]; then
- CLASSPATH=${NUTCH_HOME}:${CLASSPATH}
+ CLASSPATH="${NUTCH_HOME}:${CLASSPATH}"
fi
fi
# cygwin path translation
if $cygwin; then
- CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+ CLASSPATH="`cygpath -p -w "$CLASSPATH"`"
fi
# setup 'java.library.path' for native-hadoop code if necessary
# used only in local mode
JAVA_LIBRARY_PATH=''
if [ -d "${NUTCH_HOME}/lib/native" ]; then
- JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} org.apache.hadoop.util.PlatformName | sed -e 's/ /_/g'`
-
+
+ JAVA_PLATFORM=`"${JAVA}" -classpath "$CLASSPATH" org.apache.hadoop.util.PlatformName | sed -e 's/ /_/g'`
+
if [ -d "${NUTCH_HOME}/lib/native" ]; then
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
- JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}
+ JAVA_LIBRARY_PATH="${JAVA_LIBRARY_PATH}:${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}"
else
- JAVA_LIBRARY_PATH=${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}
+ JAVA_LIBRARY_PATH="${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}"
fi
fi
fi
if [ $cygwin = true -a "X${JAVA_LIBRARY_PATH}" != "X" ]; then
- JAVA_LIBRARY_PATH=`cygpath -p -w "$JAVA_LIBRARY_PATH"`
+ JAVA_LIBRARY_PATH="`cygpath -p -w "$JAVA_LIBRARY_PATH"`"
fi
# restore ordinary behaviour
@@ -181,14 +189,14 @@ fi
#Fix log path under cygwin
if $cygwin; then
- NUTCH_LOG_DIR=`cygpath -p -w "$NUTCH_LOG_DIR"`
+ NUTCH_LOG_DIR="`cygpath -p -w "$NUTCH_LOG_DIR"`"
fi
-NUTCH_OPTS="$NUTCH_OPTS -Dhadoop.log.dir=$NUTCH_LOG_DIR"
-NUTCH_OPTS="$NUTCH_OPTS -Dhadoop.log.file=$NUTCH_LOGFILE"
+NUTCH_OPTS=($NUTCH_OPTS -Dhadoop.log.dir="$NUTCH_LOG_DIR")
+NUTCH_OPTS=("${NUTCH_OPTS[@]}" -Dhadoop.log.file="$NUTCH_LOGFILE")
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
- NUTCH_OPTS="$NUTCH_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
+ NUTCH_OPTS=("${NUTCH_OPTS[@]}" -Djava.library.path="$JAVA_LIBRARY_PATH")
fi
# figure out which class to run
@@ -253,17 +261,17 @@ elif [ "$COMMAND" = "nodedumper" ] ; the
elif [ "$COMMAND" = "plugin" ] ; then
CLASS=org.apache.nutch.plugin.PluginRepository
elif [ "$COMMAND" = "junit" ] ; then
- CLASSPATH=$CLASSPATH:$NUTCH_HOME/test/classes/
+ CLASSPATH="$CLASSPATH:$NUTCH_HOME/test/classes/"
CLASS=junit.textui.TestRunner
else
CLASS=$COMMAND
fi
# distributed mode
-EXEC_CALL="hadoop jar $NUTCH_JOB"
+EXEC_CALL=(hadoop jar "$NUTCH_JOB")
if $local; then
- EXEC_CALL="$JAVA $JAVA_HEAP_MAX $NUTCH_OPTS -classpath $CLASSPATH"
+ EXEC_CALL=("$JAVA" $JAVA_HEAP_MAX "${NUTCH_OPTS[@]}" -classpath "$CLASSPATH")
else
# check that hadoop can be found on the path
if [ $(which hadoop | wc -l ) -eq 0 ]; then
@@ -273,5 +281,5 @@ else
fi
# run it
-exec $EXEC_CALL $CLASS "$@"
+exec "${EXEC_CALL[@]}" $CLASS "$@"