You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2014/07/05 23:13:20 UTC

svn commit: r1608135 - in /nutch: branches/2.x/CHANGES.txt branches/2.x/src/bin/crawl branches/2.x/src/bin/nutch trunk/CHANGES.txt trunk/src/bin/crawl trunk/src/bin/nutch

Author: snagel
Date: Sat Jul  5 21:13:19 2014
New Revision: 1608135

URL: http://svn.apache.org/r1608135
Log:
NUTCH-1566 bin/nutch to allow whitespace in paths

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/bin/crawl
    nutch/branches/2.x/src/bin/nutch
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/bin/crawl
    nutch/trunk/src/bin/nutch

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1608135&r1=1608134&r2=1608135&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Sat Jul  5 21:13:19 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1566 bin/nutch to allow whitespace in paths (tejasp, snagel)
+
 * NUTCH-1605 MIME type detector recognizes xlsx as zip file (snagel)
 
 * NUTCH-385 Improve description of thread related configuration for Fetcher (jnioche,lufeng)

Modified: nutch/branches/2.x/src/bin/crawl
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/bin/crawl?rev=1608135&r1=1608134&r2=1608135&view=diff
==============================================================================
--- nutch/branches/2.x/src/bin/crawl (original)
+++ nutch/branches/2.x/src/bin/crawl Sat Jul  5 21:13:19 2014
@@ -70,12 +70,12 @@ timeLimitFetch=180
 addDays=0
 #############################################
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+bin="`dirname "$0"`"
+bin="`cd "$bin"; pwd`"
 
 # determines whether mode based on presence of job file
 mode=local
-if [ -f ${bin}/../*nutch*.job ]; then
+if [ -f "${bin}"/../*nutch*.job ]; then
     mode=distributed
 fi
 
@@ -92,8 +92,7 @@ if [ $mode = "distributed" ]; then
 fi
 
 # initial injection
-$bin/nutch inject $SEEDDIR -crawlId $CRAWL_ID
-
+"$bin/nutch" inject "$SEEDDIR" -crawlId "$CRAWL_ID"
 if [ $? -ne 0 ] 
   then exit $? 
 fi
@@ -114,14 +113,14 @@ do
   batchId=`date +%s`-$RANDOM
 
   echo "Generating a new fetchlist"
-  $bin/nutch generate $commonOptions -topN $sizeFetchlist -noNorm -noFilter -adddays $addDays -crawlId $CRAWL_ID -batchId $batchId
+  "$bin/nutch" generate $commonOptions -topN $sizeFetchlist -noNorm -noFilter -adddays $addDays -crawlId "$CRAWL_ID" -batchId $batchId
   
   if [ $? -ne 0 ] 
   then exit $? 
   fi
 
   echo "Fetching : "
-  $bin/nutch fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch $batchId -crawlId $CRAWL_ID -threads 50
+  "$bin/nutch" fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch $batchId -crawlId "$CRAWL_ID" -threads 50
 
   if [ $? -ne 0 ] 
   then exit $? 
@@ -132,7 +131,7 @@ do
   # enable the skipping of records for the parsing so that a dodgy document 
   # so that it does not fail the full task
   skipRecordsOptions="-D mapred.skip.attempts.to.start.skipping=2 -D mapred.skip.map.max.skip.records=1"
-  $bin/nutch parse $commonOptions $skipRecordsOptions $batchId -crawlId $CRAWL_ID
+  "$bin/nutch" parse $commonOptions $skipRecordsOptions $batchId -crawlId "$CRAWL_ID"
 
   if [ $? -ne 0 ] 
   then exit $? 
@@ -140,21 +139,21 @@ do
 
   # updatedb with this batch
   echo "CrawlDB update for $CRAWL_ID"
-  $bin/nutch updatedb $commonOptions $batchId -crawlId $CRAWL_ID
+  "$bin/nutch" updatedb $commonOptions $batchId -crawlId "$CRAWL_ID"
 
   if [ $? -ne 0 ] 
   then exit $? 
   fi
 
   echo "Indexing $CRAWL_ID on SOLR index -> $SOLRURL"
-  $bin/nutch index $commonOptions -D solr.server.url=$SOLRURL -all -crawlId $CRAWL_ID
+  "$bin/nutch" index $commonOptions -D solr.server.url=$SOLRURL -all -crawlId "$CRAWL_ID"
   
   if [ $? -ne 0 ] 
    then exit $? 
   fi
 
   echo "SOLR dedup -> $SOLRURL"
-  $bin/nutch solrdedup $commonOptions $SOLRURL
+  "$bin/nutch" solrdedup $commonOptions $SOLRURL
   
   if [ $? -ne 0 ] 
    then exit $? 

Modified: nutch/branches/2.x/src/bin/nutch
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/bin/nutch?rev=1608135&r1=1608134&r2=1608135&view=diff
==============================================================================
--- nutch/branches/2.x/src/bin/nutch (original)
+++ nutch/branches/2.x/src/bin/nutch Sat Jul  5 21:13:19 2014
@@ -25,6 +25,14 @@
 #                   Default is 1000.
 #
 #   NUTCH_OPTS      Extra Java runtime options.
+#                   Multiple options must be separated by white space.
+#
+#   NUTCH_LOG_DIR   Log directory (default: $NUTCH_HOME/logs)
+#
+#   NUTCH_LOGFILE   Log file (default: hadoop.log)
+#
+#   NUTCH_CONF_DIR  Path(s) to configuration files (default: $NUTCH_HOME/conf).
+#                   Multiple paths must be separated by a colon ':'.
 #
 cygwin=false
 case "`uname`" in
@@ -78,13 +86,13 @@ COMMAND=$1
 shift
 
 # some directories
-THIS_DIR=`dirname "$THIS"`
-NUTCH_HOME=`cd "$THIS_DIR/.." ; pwd`
+THIS_DIR="`dirname "$THIS"`"
+NUTCH_HOME="`cd "$THIS_DIR/.." ; pwd`"
 
 # some Java parameters
 if [ "$NUTCH_JAVA_HOME" != "" ]; then
   #echo "run java in $NUTCH_JAVA_HOME"
-  JAVA_HOME=$NUTCH_JAVA_HOME
+  JAVA_HOME="$NUTCH_JAVA_HOME"
 fi
   
 if [ "$JAVA_HOME" = "" ]; then
@@ -94,21 +102,20 @@ fi
 
 
 # NUTCH_JOB 
-if [ -f ${NUTCH_HOME}/*nutch*.job ]; then
-    local=false
-  for f in $NUTCH_HOME/*nutch*.job; do
-    NUTCH_JOB=$f;
+if [ -f "${NUTCH_HOME}"/*nutch*.job ]; then
+  local=false
+  for f in "$NUTCH_HOME"/*nutch*.job; do
+    NUTCH_JOB="$f";
   done
+  # cygwin path translation
+  if $cygwin; then
+    NUTCH_JOB="`cygpath -p -w "$NUTCH_JOB"`"
+  fi
 else
   local=true
 fi
 
-# cygwin path translation
-if $cygwin; then
-  NUTCH_JOB=`cygpath -p -w "$NUTCH_JOB"`
-fi
-
-JAVA=$JAVA_HOME/bin/java
+JAVA="$JAVA_HOME/bin/java"
 JAVA_HEAP_MAX=-Xmx1000m 
 
 # check envvars which might override default args
@@ -119,46 +126,47 @@ if [ "$NUTCH_HEAPSIZE" != "" ]; then
 fi
 
 # CLASSPATH initially contains $NUTCH_CONF_DIR, or defaults to $NUTCH_HOME/conf
-CLASSPATH=${NUTCH_CONF_DIR:=$NUTCH_HOME/conf}
-CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
+CLASSPATH="${NUTCH_CONF_DIR:=$NUTCH_HOME/conf}"
+CLASSPATH="${CLASSPATH}:$JAVA_HOME/lib/tools.jar"
 
 # so that filenames w/ spaces are handled correctly in loops below
 IFS=
 
 # add libs to CLASSPATH
 if $local; then
-  for f in $NUTCH_HOME/lib/*.jar; do
-   CLASSPATH=${CLASSPATH}:$f;
+  for f in "$NUTCH_HOME"/lib/*.jar; do
+   CLASSPATH="${CLASSPATH}:$f";
   done
   # local runtime
   # add plugins to classpath
   if [ -d "$NUTCH_HOME/plugins" ]; then
-     CLASSPATH=${NUTCH_HOME}:${CLASSPATH}
+     CLASSPATH="${NUTCH_HOME}:${CLASSPATH}"
   fi
 fi
 
 # cygwin path translation
 if $cygwin; then
-  CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+  CLASSPATH="`cygpath -p -w "$CLASSPATH"`"
 fi
 
 # setup 'java.library.path' for native-hadoop code if necessary
 # used only in local mode 
 JAVA_LIBRARY_PATH=''
 if [ -d "${NUTCH_HOME}/lib/native" ]; then
-  JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} org.apache.hadoop.util.PlatformName | sed -e 's/ /_/g'`
-  
+
+  JAVA_PLATFORM=`"${JAVA}" -classpath "$CLASSPATH" org.apache.hadoop.util.PlatformName | sed -e 's/ /_/g'`
+
   if [ -d "${NUTCH_HOME}/lib/native" ]; then
     if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
-      JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}
+      JAVA_LIBRARY_PATH="${JAVA_LIBRARY_PATH}:${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}"
     else
-      JAVA_LIBRARY_PATH=${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}
+      JAVA_LIBRARY_PATH="${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}"
     fi
   fi
 fi
 
 if [ $cygwin = true -a "X${JAVA_LIBRARY_PATH}" != "X" ]; then
-  JAVA_LIBRARY_PATH=`cygpath -p -w "$JAVA_LIBRARY_PATH"`
+  JAVA_LIBRARY_PATH="`cygpath -p -w "$JAVA_LIBRARY_PATH"`"
 fi
 
 # restore ordinary behaviour
@@ -174,14 +182,14 @@ fi
 
 #Fix log path under cygwin
 if $cygwin; then
-  NUTCH_LOG_DIR=`cygpath -p -w "$NUTCH_LOG_DIR"`
+  NUTCH_LOG_DIR="`cygpath -p -w "$NUTCH_LOG_DIR"`"
 fi
 
-NUTCH_OPTS="$NUTCH_OPTS -Dhadoop.log.dir=$NUTCH_LOG_DIR"
-NUTCH_OPTS="$NUTCH_OPTS -Dhadoop.log.file=$NUTCH_LOGFILE"
+NUTCH_OPTS=($NUTCH_OPTS -Dhadoop.log.dir="$NUTCH_LOG_DIR")
+NUTCH_OPTS=("${NUTCH_OPTS[@]}" -Dhadoop.log.file="$NUTCH_LOGFILE")
 
 if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
-  NUTCH_OPTS="$NUTCH_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
+  NUTCH_OPTS=("${NUTCH_OPTS[@]}" -Djava.library.path="$JAVA_LIBRARY_PATH")
 fi
 
 # figure out which class to run
@@ -229,7 +237,7 @@ CLASS=org.apache.nutch.plugin.PluginRepo
 elif [ "$COMMAND" = "nutchserver" ] ; then
 CLASS=org.apache.nutch.api.NutchServer
 elif [ "$COMMAND" = "junit" ] ; then
-  CLASSPATH=$CLASSPATH:$NUTCH_HOME/test/classes/
+  CLASSPATH="$CLASSPATH:$NUTCH_HOME/test/classes/"
   CLASS=junit.textui.TestRunner
 else
 CLASS=$COMMAND
@@ -238,8 +246,8 @@ fi
 
 if $local; then
  # fix for the external Xerces lib issue with SAXParserFactory
- NUTCH_OPTS="-Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl $NUTCH_OPTS"
- EXEC_CALL="$JAVA $JAVA_HEAP_MAX $NUTCH_OPTS -classpath $CLASSPATH"
+ NUTCH_OPTS=(-Djavax.xml.parsers.DocumentBuilderFactory=com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl "${NUTCH_OPTS[@]}")
+ EXEC_CALL=("$JAVA" $JAVA_HEAP_MAX "${NUTCH_OPTS[@]}" -classpath "$CLASSPATH")
 else
  # check that hadoop can be found on the path
  if [ $(which hadoop | wc -l ) -eq 0 ]; then
@@ -247,9 +255,9 @@ else
     exit -1;
  fi
  # distributed mode
- EXEC_CALL="hadoop jar $NUTCH_JOB"
+ EXEC_CALL=(hadoop jar "$NUTCH_JOB")
 fi
 
 # run it
-exec $EXEC_CALL $CLASS "$@"
+exec "${EXEC_CALL[@]}" $CLASS "$@"
 

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1608135&r1=1608134&r2=1608135&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Jul  5 21:13:19 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development
 
+* NUTCH-1566 bin/nutch to allow whitespace in paths (tejasp, snagel)
+
 * NUTCH-1605 MIME type detector recognizes xlsx as zip file (snagel)
 
 * NUTCH-1802 Move TestbedProxy to test environment (jnioche)

Modified: nutch/trunk/src/bin/crawl
URL: http://svn.apache.org/viewvc/nutch/trunk/src/bin/crawl?rev=1608135&r1=1608134&r2=1608135&view=diff
==============================================================================
--- nutch/trunk/src/bin/crawl (original)
+++ nutch/trunk/src/bin/crawl Sat Jul  5 21:13:19 2014
@@ -69,12 +69,12 @@ numThreads=50
 
 #############################################
 
-bin=`dirname "$0"`
-bin=`cd "$bin"; pwd`
+bin="`dirname "$0"`"
+bin="`cd "$bin"; pwd`"
 
 # determines whether mode based on presence of job file
 mode=local
-if [ -f ${bin}/../*nutch*.job ]; then
+if [ -f "${bin}"/../*nutch*.job ]; then
     mode=distributed
 fi
 
@@ -91,7 +91,7 @@ if [ $mode = "distributed" ]; then
 fi
 
 # initial injection
-$bin/nutch inject $CRAWL_PATH/crawldb $SEEDDIR
+"$bin/nutch" inject "$CRAWL_PATH"/crawldb "$SEEDDIR"
 
 if [ $? -ne 0 ] 
   then exit $? 
@@ -110,7 +110,7 @@ do
   echo `date` ": Iteration $a of $LIMIT"
 
   echo "Generating a new segment"
-  $bin/nutch generate $commonOptions $CRAWL_PATH/crawldb $CRAWL_PATH/segments -topN $sizeFetchlist -numFetchers $numSlaves -noFilter
+  "$bin/nutch" generate $commonOptions "$CRAWL_PATH"/crawldb "$CRAWL_PATH"/segments -topN $sizeFetchlist -numFetchers $numSlaves -noFilter
   
   if [ $? -ne 0 ] 
   then exit $? 
@@ -121,16 +121,16 @@ do
   # or use ls
 
   if [ $mode = "local" ]; then
-   SEGMENT=`ls $CRAWL_PATH/segments/ | sort -n | tail -n 1`
+   SEGMENT=`ls "$CRAWL_PATH"/segments/ | sort -n | tail -n 1`
   else
-   SEGMENT=`hadoop fs -ls $CRAWL_PATH/segments/ | grep segments |  sed -e "s/\//\\n/g" | egrep 20[0-9]+ | sort -n | tail -n 1`
+   SEGMENT=`hadoop fs -ls "$CRAWL_PATH"/segments/ | grep segments |  sed -e "s/\//\\n/g" | egrep 20[0-9]+ | sort -n | tail -n 1`
   fi
   
   echo "Operating on segment : $SEGMENT"
 
   # fetching the segment
   echo "Fetching : $SEGMENT"
-  $bin/nutch fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch $CRAWL_PATH/segments/$SEGMENT -noParsing -threads $numThreads
+  "$bin/nutch" fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch "$CRAWL_PATH"/segments/$SEGMENT -noParsing -threads $numThreads
 
   if [ $? -ne 0 ] 
   then exit $? 
@@ -141,7 +141,7 @@ do
   # enable the skipping of records for the parsing so that a dodgy document 
   # so that it does not fail the full task
   skipRecordsOptions="-D mapred.skip.attempts.to.start.skipping=2 -D mapred.skip.map.max.skip.records=1"
-  $bin/nutch parse $commonOptions $skipRecordsOptions $CRAWL_PATH/segments/$SEGMENT
+  "$bin/nutch" parse $commonOptions $skipRecordsOptions "$CRAWL_PATH"/segments/$SEGMENT
 
   if [ $? -ne 0 ] 
   then exit $? 
@@ -149,7 +149,7 @@ do
 
   # updatedb with this segment
   echo "CrawlDB update"
-  $bin/nutch updatedb $commonOptions $CRAWL_PATH/crawldb  $CRAWL_PATH/segments/$SEGMENT
+  "$bin/nutch" updatedb $commonOptions "$CRAWL_PATH"/crawldb  "$CRAWL_PATH"/segments/$SEGMENT
 
   if [ $? -ne 0 ] 
   then exit $? 
@@ -158,7 +158,7 @@ do
 # note that the link inversion - indexing routine can be done within the main loop 
 # on a per segment basis
   echo "Link inversion"
-  $bin/nutch invertlinks $CRAWL_PATH/linkdb $CRAWL_PATH/segments/$SEGMENT
+  "$bin/nutch" invertlinks "$CRAWL_PATH"/linkdb "$CRAWL_PATH"/segments/$SEGMENT
 
   if [ $? -ne 0 ] 
   then exit $? 
@@ -172,14 +172,14 @@ do
   fi
 
   echo "Indexing $SEGMENT on SOLR index -> $SOLRURL"
-  $bin/nutch index -D solr.server.url=$SOLRURL $CRAWL_PATH/crawldb -linkdb $CRAWL_PATH/linkdb $CRAWL_PATH/segments/$SEGMENT
+  "$bin/nutch" index -D solr.server.url=$SOLRURL "$CRAWL_PATH"/crawldb -linkdb "$CRAWL_PATH"/linkdb "$CRAWL_PATH"/segments/$SEGMENT
   
   if [ $? -ne 0 ] 
    then exit $? 
   fi
 
   echo "Cleanup on SOLR index -> $SOLRURL"
-  $bin/nutch clean -D solr.server.url=$SOLRURL $CRAWL_PATH/crawldb
+  "$bin/nutch" clean -D solr.server.url=$SOLRURL "$CRAWL_PATH"/crawldb
   
   if [ $? -ne 0 ] 
    then exit $? 

Modified: nutch/trunk/src/bin/nutch
URL: http://svn.apache.org/viewvc/nutch/trunk/src/bin/nutch?rev=1608135&r1=1608134&r2=1608135&view=diff
==============================================================================
--- nutch/trunk/src/bin/nutch (original)
+++ nutch/trunk/src/bin/nutch Sat Jul  5 21:13:19 2014
@@ -25,6 +25,14 @@
 #                   Default is 1000.
 #
 #   NUTCH_OPTS      Extra Java runtime options.
+#                   Multiple options must be separated by white space.
+#
+#   NUTCH_LOG_DIR   Log directory (default: $NUTCH_HOME/logs)
+#
+#   NUTCH_LOGFILE   Log file (default: hadoop.log)
+#
+#   NUTCH_CONF_DIR  Path(s) to configuration files (default: $NUTCH_HOME/conf).
+#                   Multiple paths must be separated by a colon ':'.
 #
 cygwin=false
 case "`uname`" in
@@ -86,13 +94,13 @@ COMMAND=$1
 shift
 
 # some directories
-THIS_DIR=`dirname "$THIS"`
-NUTCH_HOME=`cd "$THIS_DIR/.." ; pwd`
+THIS_DIR="`dirname "$THIS"`"
+NUTCH_HOME="`cd "$THIS_DIR/.." ; pwd`"
 
 # some Java parameters
 if [ "$NUTCH_JAVA_HOME" != "" ]; then
   #echo "run java in $NUTCH_JAVA_HOME"
-  JAVA_HOME=$NUTCH_JAVA_HOME
+  JAVA_HOME="$NUTCH_JAVA_HOME"
 fi
   
 if [ "$JAVA_HOME" = "" ]; then
@@ -103,19 +111,18 @@ fi
 local=true
 
 # NUTCH_JOB 
-if [ -f ${NUTCH_HOME}/*nutch*.job ]; then
-    local=false
-  for f in $NUTCH_HOME/*nutch*.job; do
-    NUTCH_JOB=$f;
+if [ -f "${NUTCH_HOME}"/*nutch*.job ]; then
+  local=false
+  for f in "$NUTCH_HOME"/*nutch*.job; do
+    NUTCH_JOB="$f"
   done
+  # cygwin path translation
+  if $cygwin; then
+	NUTCH_JOB="`cygpath -p -w "$NUTCH_JOB"`"
+  fi
 fi
 
-# cygwin path translation
-if $cygwin; then
-  NUTCH_JOB=`cygpath -p -w "$NUTCH_JOB"`
-fi
-
-JAVA=$JAVA_HOME/bin/java
+JAVA="$JAVA_HOME/bin/java"
 JAVA_HEAP_MAX=-Xmx1000m 
 
 # check envvars which might override default args
@@ -126,46 +133,47 @@ if [ "$NUTCH_HEAPSIZE" != "" ]; then
 fi
 
 # CLASSPATH initially contains $NUTCH_CONF_DIR, or defaults to $NUTCH_HOME/conf
-CLASSPATH=${NUTCH_CONF_DIR:=$NUTCH_HOME/conf}
-CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
+CLASSPATH="${NUTCH_CONF_DIR:=$NUTCH_HOME/conf}"
+CLASSPATH="${CLASSPATH}:$JAVA_HOME/lib/tools.jar"
 
 # so that filenames w/ spaces are handled correctly in loops below
 IFS=
 
 # add libs to CLASSPATH
 if $local; then
-  for f in $NUTCH_HOME/lib/*.jar; do
-   CLASSPATH=${CLASSPATH}:$f;
+  for f in "$NUTCH_HOME"/lib/*.jar; do
+   CLASSPATH="${CLASSPATH}:$f";
   done
   # local runtime
   # add plugins to classpath
   if [ -d "$NUTCH_HOME/plugins" ]; then
-     CLASSPATH=${NUTCH_HOME}:${CLASSPATH}
+     CLASSPATH="${NUTCH_HOME}:${CLASSPATH}"
   fi
 fi
 
 # cygwin path translation
 if $cygwin; then
-  CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+  CLASSPATH="`cygpath -p -w "$CLASSPATH"`"
 fi
 
 # setup 'java.library.path' for native-hadoop code if necessary
 # used only in local mode 
 JAVA_LIBRARY_PATH=''
 if [ -d "${NUTCH_HOME}/lib/native" ]; then
-  JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} org.apache.hadoop.util.PlatformName | sed -e 's/ /_/g'`
-  
+
+  JAVA_PLATFORM=`"${JAVA}" -classpath "$CLASSPATH" org.apache.hadoop.util.PlatformName | sed -e 's/ /_/g'`
+
   if [ -d "${NUTCH_HOME}/lib/native" ]; then
     if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
-      JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}
+      JAVA_LIBRARY_PATH="${JAVA_LIBRARY_PATH}:${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}"
     else
-      JAVA_LIBRARY_PATH=${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}
+      JAVA_LIBRARY_PATH="${NUTCH_HOME}/lib/native/${JAVA_PLATFORM}"
     fi
   fi
 fi
 
 if [ $cygwin = true -a "X${JAVA_LIBRARY_PATH}" != "X" ]; then
-  JAVA_LIBRARY_PATH=`cygpath -p -w "$JAVA_LIBRARY_PATH"`
+  JAVA_LIBRARY_PATH="`cygpath -p -w "$JAVA_LIBRARY_PATH"`"
 fi
 
 # restore ordinary behaviour
@@ -181,14 +189,14 @@ fi
 
 #Fix log path under cygwin
 if $cygwin; then
-  NUTCH_LOG_DIR=`cygpath -p -w "$NUTCH_LOG_DIR"`
+  NUTCH_LOG_DIR="`cygpath -p -w "$NUTCH_LOG_DIR"`"
 fi
 
-NUTCH_OPTS="$NUTCH_OPTS -Dhadoop.log.dir=$NUTCH_LOG_DIR"
-NUTCH_OPTS="$NUTCH_OPTS -Dhadoop.log.file=$NUTCH_LOGFILE"
+NUTCH_OPTS=($NUTCH_OPTS -Dhadoop.log.dir="$NUTCH_LOG_DIR")
+NUTCH_OPTS=("${NUTCH_OPTS[@]}" -Dhadoop.log.file="$NUTCH_LOGFILE")
 
 if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
-  NUTCH_OPTS="$NUTCH_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
+  NUTCH_OPTS=("${NUTCH_OPTS[@]}" -Djava.library.path="$JAVA_LIBRARY_PATH")
 fi
 
 # figure out which class to run
@@ -253,17 +261,17 @@ elif [ "$COMMAND" = "nodedumper" ] ; the
 elif [ "$COMMAND" = "plugin" ] ; then
   CLASS=org.apache.nutch.plugin.PluginRepository
 elif [ "$COMMAND" = "junit" ] ; then
-  CLASSPATH=$CLASSPATH:$NUTCH_HOME/test/classes/
+  CLASSPATH="$CLASSPATH:$NUTCH_HOME/test/classes/"
   CLASS=junit.textui.TestRunner
 else
   CLASS=$COMMAND
 fi
 
 # distributed mode
-EXEC_CALL="hadoop jar $NUTCH_JOB"
+EXEC_CALL=(hadoop jar "$NUTCH_JOB")
 
 if $local; then
- EXEC_CALL="$JAVA $JAVA_HEAP_MAX $NUTCH_OPTS -classpath $CLASSPATH"
+ EXEC_CALL=("$JAVA" $JAVA_HEAP_MAX "${NUTCH_OPTS[@]}" -classpath "$CLASSPATH")
 else
  # check that hadoop can be found on the path
  if [ $(which hadoop | wc -l ) -eq 0 ]; then
@@ -273,5 +281,5 @@ else
 fi
 
 # run it
-exec $EXEC_CALL $CLASS "$@"
+exec "${EXEC_CALL[@]}" $CLASS "$@"