You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/02/03 20:45:51 UTC

svn commit: r374733 [1/4] - in /lucene/hadoop/trunk: ./ bin/ conf/ lib/ lib/jetty-ext/ src/java/ src/java/org/ src/java/org/apache/ src/java/org/apache/hadoop/ src/java/org/apache/hadoop/conf/ src/java/org/apache/hadoop/dfs/ src/java/org/apache/hadoop/...

Author: cutting
Date: Fri Feb  3 11:45:32 2006
New Revision: 374733

URL: http://svn.apache.org/viewcvs?rev=374733&view=rev
Log:
Initial commit of code copied from Nutch.

Added:
    lucene/hadoop/trunk/bin/
    lucene/hadoop/trunk/bin/hadoop
      - copied, changed from r374710, lucene/nutch/trunk/bin/nutch
    lucene/hadoop/trunk/bin/hadoop-daemon.sh
      - copied, changed from r374710, lucene/nutch/trunk/bin/nutch-daemon.sh
    lucene/hadoop/trunk/bin/hadoop-daemons.sh
      - copied, changed from r374710, lucene/nutch/trunk/bin/nutch-daemons.sh
    lucene/hadoop/trunk/bin/slaves.sh
      - copied, changed from r374710, lucene/nutch/trunk/bin/slaves.sh
    lucene/hadoop/trunk/bin/start-all.sh
      - copied, changed from r374710, lucene/nutch/trunk/bin/start-all.sh
    lucene/hadoop/trunk/bin/stop-all.sh
      - copied, changed from r374710, lucene/nutch/trunk/bin/stop-all.sh
    lucene/hadoop/trunk/build.xml
      - copied, changed from r374710, lucene/nutch/trunk/build.xml
    lucene/hadoop/trunk/conf/
    lucene/hadoop/trunk/conf/hadoop-default.xml
      - copied, changed from r374710, lucene/nutch/trunk/conf/nutch-default.xml
    lucene/hadoop/trunk/conf/hadoop-site.xml.template
      - copied unchanged from r374710, lucene/nutch/trunk/conf/nutch-site.xml.template
    lucene/hadoop/trunk/conf/mapred-default.xml.template
      - copied unchanged from r374710, lucene/nutch/trunk/conf/mapred-default.xml.template
    lucene/hadoop/trunk/lib/
    lucene/hadoop/trunk/lib/jetty-5.1.4.LICENSE.txt
      - copied unchanged from r374710, lucene/nutch/trunk/lib/jetty-5.1.4.LICENSE.txt
    lucene/hadoop/trunk/lib/jetty-5.1.4.jar
      - copied unchanged from r374710, lucene/nutch/trunk/lib/jetty-5.1.4.jar
    lucene/hadoop/trunk/lib/jetty-ext/
      - copied from r374710, lucene/nutch/trunk/lib/jetty-ext/
    lucene/hadoop/trunk/lib/junit-3.8.1.LICENSE.txt
      - copied unchanged from r374710, lucene/nutch/trunk/lib/junit-3.8.1.LICENSE.txt
    lucene/hadoop/trunk/lib/junit-3.8.1.jar
      - copied unchanged from r374710, lucene/nutch/trunk/lib/junit-3.8.1.jar
    lucene/hadoop/trunk/lib/servlet-api.jar
      - copied unchanged from r374710, lucene/nutch/trunk/lib/servlet-api.jar
    lucene/hadoop/trunk/src/java/
    lucene/hadoop/trunk/src/java/org/
    lucene/hadoop/trunk/src/java/org/apache/
    lucene/hadoop/trunk/src/java/org/apache/hadoop/
    lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/
    lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configurable.java
      - copied, changed from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigurable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configuration.java
      - copied, changed from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConf.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configured.java
      - copied, changed from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigured.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/
      - copied from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/ndfs/
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/
      - copied from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/fs/
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/
      - copied from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/io/
    lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/
      - copied from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/ipc/
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/
      - copied from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/mapred/
    lucene/hadoop/trunk/src/java/org/apache/hadoop/util/
    lucene/hadoop/trunk/src/java/org/apache/hadoop/util/Daemon.java
      - copied, changed from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/util/LogFormatter.java
      - copied, changed from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/LogFormatter.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/util/Progress.java
      - copied, changed from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/Progress.java
Modified:
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/Block.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DF.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDataset.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/LocatedBlock.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NDFSClient.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NDFSFile.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NDFSFileInfo.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumException.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FSError.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileUtil.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NDFSFileSystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NDFSShell.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NFSDataInputStream.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NFSDataOutputStream.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NFSInputStream.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NFSOutputStream.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NutchFileSystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/Seekable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/ArrayFile.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/ArrayWritable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/BooleanWritable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/BytesWritable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/CompressedWritable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/DataInputBuffer.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/DataOutputBuffer.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/FloatWritable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/IntWritable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/LongWritable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MD5Hash.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/NullWritable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/ObjectWritable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/SequenceFile.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/SetFile.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/TwoDArrayWritable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/UTF8.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/VersionMismatchException.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/VersionedWritable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/Writable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/WritableComparable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/WritableComparator.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/WritableName.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/WritableUtils.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/Client.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/RPC.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/Server.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/CombiningCollector.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/FileSplit.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConfigurable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobProfile.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobStatus.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobSubmissionProtocol.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTrackerInfoServer.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/LocalJobRunner.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MRConstants.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapFileOutputFormat.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapOutputFile.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapOutputLocation.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapOutputProtocol.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunnable.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunner.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTaskRunner.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/Mapper.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/OutputCollector.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/OutputFormat.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/Partitioner.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordReader.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordWriter.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTaskRunner.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/Reducer.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/Reporter.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RunningJob.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileInputFormat.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileOutputFormat.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileRecordReader.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/Task.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskInProgress.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTrackerStatus.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/Grep.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/HashPartitioner.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/IdentityMapper.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/IdentityReducer.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/InverseMapper.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/LongSumReducer.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/RegexMapper.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java

Copied: lucene/hadoop/trunk/bin/hadoop (from r374710, lucene/nutch/trunk/bin/nutch)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/hadoop?p2=lucene/hadoop/trunk/bin/hadoop&p1=lucene/nutch/trunk/bin/nutch&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/nutch (original)
+++ lucene/hadoop/trunk/bin/hadoop Fri Feb  3 11:45:32 2006
@@ -1,15 +1,15 @@
 #!/bin/bash
 # 
-# The Nutch command script
+# The Hadoop command script
 #
 # Environment Variables
 #
-#   NUTCH_JAVA_HOME The java implementation to use.  Overrides JAVA_HOME.
+#   JAVA_HOME The java implementation to use.  Overrides JAVA_HOME.
 #
-#   NUTCH_HEAPSIZE  The maximum amount of heap to use, in MB. 
+#   HADOOP_HEAPSIZE  The maximum amount of heap to use, in MB. 
 #                   Default is 1000.
 #
-#   NUTCH_OPTS      Extra Java runtime options.
+#   HADOOP_OPTS      Extra Java runtime options.
 #
 
 # resolve links - $0 may be a softlink
@@ -26,26 +26,11 @@
 
 # if no args specified, show usage
 if [ $# = 0 ]; then
-  echo "Usage: nutch COMMAND"
+  echo "Usage: hadoop COMMAND"
   echo "where COMMAND is one of:"
-  echo "  crawl             one-step crawler for intranets"
-  echo "  readdb            read / dump crawl db"
-  echo "  readlinkdb        read / dump link db"
-  echo "  inject            inject new urls into the database"
-  echo "  generate          generate new segments to fetch"
-  echo "  fetch             fetch a segment's pages"
-  echo "  parse             parse a segment's pages"
-  echo "  segread           read / dump segment data"
-  echo "  updatedb          update crawl db from segments after fetching"
-  echo "  invertlinks       create a linkdb from parsed segments"
-  echo "  index             run the indexer on parsed segments and linkdb"
-  echo "  merge             merge several segment indexes"
-  echo "  dedup             remove duplicates from a set of segment indexes"
-  echo "  plugin            load a plugin and run one of its classes main()"
-  echo "  server            run a search server"
-  echo "  namenode          run the NDFS namenode"
-  echo "  datanode          run an NDFS datanode"
-  echo "  ndfs              run an NDFS admin client"
+  echo "  namenode          run the DFS namenode"
+  echo "  datanode          run an DFS datanode"
+  echo "  dfs              run an DFS admin client"
   echo "  jobtracker        run the MapReduce job Tracker node" 
   echo "  tasktracker       run a MapReduce task Tracker node" 
   echo "  job               manipulate MapReduce jobs" 
@@ -61,12 +46,12 @@
 
 # some directories
 THIS_DIR=`dirname "$THIS"`
-NUTCH_HOME=`cd "$THIS_DIR/.." ; pwd`
+HADOOP_HOME=`cd "$THIS_DIR/.." ; pwd`
 
 # some Java parameters
-if [ "$NUTCH_JAVA_HOME" != "" ]; then
-  #echo "run java in $NUTCH_JAVA_HOME"
-  JAVA_HOME=$NUTCH_JAVA_HOME
+if [ "$JAVA_HOME" != "" ]; then
+  #echo "run java in $JAVA_HOME"
+  JAVA_HOME=$JAVA_HOME
 fi
   
 if [ "$JAVA_HOME" = "" ]; then
@@ -78,46 +63,46 @@
 JAVA_HEAP_MAX=-Xmx1000m 
 
 # check envvars which might override default args
-if [ "$NUTCH_HEAPSIZE" != "" ]; then
-  #echo "run with heapsize $NUTCH_HEAPSIZE"
-  JAVA_HEAP_MAX="-Xmx""$NUTCH_HEAPSIZE""m"
+if [ "$HADOOP_HEAPSIZE" != "" ]; then
+  #echo "run with heapsize $HADOOP_HEAPSIZE"
+  JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"
   #echo $JAVA_HEAP_MAX
 fi
 
-# CLASSPATH initially contains $NUTCH_CONF_DIR, or defaults to $NUTCH_HOME/conf
-CLASSPATH=${NUTCH_CONF_DIR:=$NUTCH_HOME/conf}
+# CLASSPATH initially contains $HADOOP_CONF_DIR, or defaults to $HADOOP_HOME/conf
+CLASSPATH=${HADOOP_CONF_DIR:=$HADOOP_HOME/conf}
 CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
 
-# for developers, add Nutch classes to CLASSPATH
-if [ -d "$NUTCH_HOME/build/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build/classes
+# for developers, add Hadoop classes to CLASSPATH
+if [ -d "$HADOOP_HOME/build/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/classes
 fi
-if [ -d "$NUTCH_HOME/build/plugins" ]; then
-  CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build
+if [ -d "$HADOOP_HOME/build/plugins" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build
 fi
-if [ -d "$NUTCH_HOME/build/test/classes" ]; then
-  CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build/test/classes
+if [ -d "$HADOOP_HOME/build/test/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/test/classes
 fi
 
 # so that filenames w/ spaces are handled correctly in loops below
 IFS=
 
-# for releases, add Nutch jar to CLASSPATH
-for f in $NUTCH_HOME/nutch-*.jar; do
+# for releases, add Hadoop jar to CLASSPATH
+for f in $HADOOP_HOME/hadoop-*.jar; do
   CLASSPATH=${CLASSPATH}:$f;
 done
 
 # add plugins to classpath
-if [ -d "$NUTCH_HOME/plugins" ]; then
-  CLASSPATH=${CLASSPATH}:$NUTCH_HOME
+if [ -d "$HADOOP_HOME/plugins" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME
 fi
 
 # add libs to CLASSPATH
-for f in $NUTCH_HOME/lib/*.jar; do
+for f in $HADOOP_HOME/lib/*.jar; do
   CLASSPATH=${CLASSPATH}:$f;
 done
 
-for f in $NUTCH_HOME/lib/jetty-ext/*.jar; do
+for f in $HADOOP_HOME/lib/jetty-ext/*.jar; do
   CLASSPATH=${CLASSPATH}:$f;
 done
 
@@ -125,48 +110,18 @@
 unset IFS
 
 # figure out which class to run
-if [ "$COMMAND" = "crawl" ] ; then
-  CLASS=org.apache.nutch.crawl.Crawl
-elif [ "$COMMAND" = "inject" ] ; then
-  CLASS=org.apache.nutch.crawl.Injector
-elif [ "$COMMAND" = "generate" ] ; then
-  CLASS=org.apache.nutch.crawl.Generator
-elif [ "$COMMAND" = "fetch" ] ; then
-  CLASS=org.apache.nutch.fetcher.Fetcher
-elif [ "$COMMAND" = "parse" ] ; then
-  CLASS=org.apache.nutch.parse.ParseSegment
-elif [ "$COMMAND" = "readdb" ] ; then
-  CLASS=org.apache.nutch.crawl.CrawlDbReader
-elif [ "$COMMAND" = "readlinkdb" ] ; then
-  CLASS=org.apache.nutch.crawl.LinkDbReader
-elif [ "$COMMAND" = "segread" ] ; then
-  CLASS=org.apache.nutch.segment.SegmentReader
-elif [ "$COMMAND" = "updatedb" ] ; then
-  CLASS=org.apache.nutch.crawl.CrawlDb
-elif [ "$COMMAND" = "invertlinks" ] ; then
-  CLASS=org.apache.nutch.crawl.LinkDb
-elif [ "$COMMAND" = "index" ] ; then
-  CLASS=org.apache.nutch.indexer.Indexer
-elif [ "$COMMAND" = "dedup" ] ; then
-  CLASS=org.apache.nutch.indexer.DeleteDuplicates
-elif [ "$COMMAND" = "merge" ] ; then
-  CLASS=org.apache.nutch.indexer.IndexMerger
-elif [ "$COMMAND" = "plugin" ] ; then
-  CLASS=org.apache.nutch.plugin.PluginRepository
-elif [ "$COMMAND" = "server" ] ; then
-  CLASS='org.apache.nutch.searcher.DistributedSearch$Server'
-elif [ "$COMMAND" = "namenode" ] ; then
-  CLASS='org.apache.nutch.ndfs.NameNode'
+if [ "$COMMAND" = "namenode" ] ; then
+  CLASS='org.apache.hadoop.dfs.NameNode'
 elif [ "$COMMAND" = "datanode" ] ; then
-  CLASS='org.apache.nutch.ndfs.DataNode'
-elif [ "$COMMAND" = "ndfs" ] ; then
-  CLASS=org.apache.nutch.fs.NDFSShell
+  CLASS='org.apache.hadoop.dfs.DataNode'
+elif [ "$COMMAND" = "dfs" ] ; then
+  CLASS=org.apache.hadoop.fs.DFSShell
 elif [ "$COMMAND" = "jobtracker" ] ; then
-  CLASS=org.apache.nutch.mapred.JobTracker
+  CLASS=org.apache.hadoop.mapred.JobTracker
 elif [ "$COMMAND" = "tasktracker" ] ; then
-  CLASS=org.apache.nutch.mapred.TaskTracker
+  CLASS=org.apache.hadoop.mapred.TaskTracker
 elif [ "$COMMAND" = "job" ] ; then
-  CLASS=org.apache.nutch.mapred.JobClient
+  CLASS=org.apache.hadoop.mapred.JobClient
 else
   CLASS=$COMMAND
 fi
@@ -177,5 +132,4 @@
 fi
 
 # run it
-exec "$JAVA" $JAVA_HEAP_MAX $NUTCH_OPTS -classpath "$CLASSPATH" $CLASS "$@"
-
+exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"

Copied: lucene/hadoop/trunk/bin/hadoop-daemon.sh (from r374710, lucene/nutch/trunk/bin/nutch-daemon.sh)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/hadoop-daemon.sh?p2=lucene/hadoop/trunk/bin/hadoop-daemon.sh&p1=lucene/nutch/trunk/bin/nutch-daemon.sh&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/nutch-daemon.sh (original)
+++ lucene/hadoop/trunk/bin/hadoop-daemon.sh Fri Feb  3 11:45:32 2006
@@ -1,16 +1,16 @@
 #!/bin/bash
 # 
-# Runs a Nutch command as a daemon.
+# Runs a Hadoop command as a daemon.
 #
 # Environment Variables
 #
-#   NUTCH_LOG_DIR   Where log files are stored.  PWD by default.
-#   NUTCH_MASTER    host:path where nutch code should be rsync'd from
-#   NUTCH_PID_DIR   The pid files are stored. /tmp by default.
-#   NUTCH_IDENT_STRING   A string representing this instance of nutch. $USER by default
+#   HADOOP_LOG_DIR   Where log files are stored.  PWD by default.
+#   HADOOP_MASTER    host:path where hadoop code should be rsync'd from
+#   HADOOP_PID_DIR   The pid files are stored. /tmp by default.
+#   HADOOP_IDENT_STRING   A string representing this instance of hadoop. $USER by default
 ##
 
-usage="Usage: nutch-daemon [start|stop] [nutch-command] [args...]"
+usage="Usage: hadoop-daemon [start|stop] [hadoop-command] [args...]"
 
 # if no args specified, show usage
 if [ $# -le 1 ]; then
@@ -37,21 +37,21 @@
 done
 
 # get log directory
-if [ "$NUTCH_LOG_DIR" = "" ]; then
-  NUTCH_LOG_DIR=$PWD
+if [ "$HADOOP_LOG_DIR" = "" ]; then
+  HADOOP_LOG_DIR=$PWD
 fi
 
-if [ "$NUTCH_PID_DIR" = "" ]; then
-  NUTCH_PID_DIR=/tmp
+if [ "$HADOOP_PID_DIR" = "" ]; then
+  HADOOP_PID_DIR=/tmp
 fi
 
-if [ "$NUTCH_IDENT_STRING" = "" ]; then
-  NUTCH_IDENT_STRING=$USER
+if [ "$HADOOP_IDENT_STRING" = "" ]; then
+  HADOOP_IDENT_STRING=$USER
 fi
 
 # some variables
-log=$NUTCH_LOG_DIR/nutch-$NUTCH_IDENT_STRING-$command-`hostname`.log
-pid=$NUTCH_PID_DIR/nutch-$NUTCH_IDENT_STRING-$command.pid
+log=$HADOOP_LOG_DIR/hadoop-$HADOOP_IDENT_STRING-$command-`hostname`.log
+pid=$HADOOP_PID_DIR/hadoop-$HADOOP_IDENT_STRING-$command.pid
 
 case $startStop in
 
@@ -65,14 +65,14 @@
     fi
 
     root=`dirname $this`/..
-    if [ "$NUTCH_MASTER" != "" ]; then
-      echo rsync from $NUTCH_MASTER
-      rsync -a --delete --exclude=.svn $NUTCH_MASTER/ $root
+    if [ "$HADOOP_MASTER" != "" ]; then
+      echo rsync from $HADOOP_MASTER
+      rsync -a --delete --exclude=.svn $HADOOP_MASTER/ $root
     fi
 
     cd $root
     echo starting $command, logging to $log
-    nohup bin/nutch $command "$@" >& $log < /dev/null &
+    nohup bin/hadoop $command "$@" >& $log < /dev/null &
     echo $! > $pid
     sleep 1; head $log
     ;;

Copied: lucene/hadoop/trunk/bin/hadoop-daemons.sh (from r374710, lucene/nutch/trunk/bin/nutch-daemons.sh)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/hadoop-daemons.sh?p2=lucene/hadoop/trunk/bin/hadoop-daemons.sh&p1=lucene/nutch/trunk/bin/nutch-daemons.sh&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/nutch-daemons.sh (original)
+++ lucene/hadoop/trunk/bin/hadoop-daemons.sh Fri Feb  3 11:45:32 2006
@@ -1,8 +1,8 @@
 #!/bin/bash
 # 
-# Run a Nutch command on all slave hosts.
+# Run a Hadoop command on all slave hosts.
 
-usage="Usage: nutch-daemons.sh [start|stop] command args..."
+usage="Usage: hadoop-daemons.sh [start|stop] command args..."
 
 # if no args specified, show usage
 if [ $# -le 1 ]; then
@@ -13,4 +13,4 @@
 bin=`dirname $0`
 bin=`cd $bin; pwd`
 
-exec $bin/slaves.sh $bin/nutch-daemon.sh "$@"
+exec $bin/slaves.sh $bin/hadoop-daemon.sh "$@"

Copied: lucene/hadoop/trunk/bin/slaves.sh (from r374710, lucene/nutch/trunk/bin/slaves.sh)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/slaves.sh?p2=lucene/hadoop/trunk/bin/slaves.sh&p1=lucene/nutch/trunk/bin/slaves.sh&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/slaves.sh (original)
+++ lucene/hadoop/trunk/bin/slaves.sh Fri Feb  3 11:45:32 2006
@@ -4,7 +4,7 @@
 #
 # Environment Variables
 #
-#   NUTCH_SLAVES    File naming remote hosts.  Default is ~/.slaves
+#   HADOOP_SLAVES    File naming remote hosts.  Default is ~/.slaves
 ##
 
 usage="Usage: slaves.sh command..."
@@ -15,11 +15,11 @@
   exit 1
 fi
 
-if [ "$NUTCH_SLAVES" = "" ]; then
-  export NUTCH_SLAVES=$HOME/.slaves
+if [ "$HADOOP_SLAVES" = "" ]; then
+  export HADOOP_SLAVES=$HOME/.slaves
 fi
 
-for slave in `cat $NUTCH_SLAVES`; do
+for slave in `cat $HADOOP_SLAVES`; do
  ssh -o ConnectTimeout=1 $slave "$@" \
    2>&1 | sed "s/^/$slave: /" &
 done

Copied: lucene/hadoop/trunk/bin/start-all.sh (from r374710, lucene/nutch/trunk/bin/start-all.sh)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/start-all.sh?p2=lucene/hadoop/trunk/bin/start-all.sh&p1=lucene/nutch/trunk/bin/start-all.sh&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/start-all.sh (original)
+++ lucene/hadoop/trunk/bin/start-all.sh Fri Feb  3 11:45:32 2006
@@ -1,11 +1,11 @@
 #!/bin/bash
 
-# Start all nutch daemons.  Run this on master node.
+# Start all hadoop daemons.  Run this on master node.
 
 bin=`dirname $0`
 bin=`cd $bin; pwd`
 
-$bin/nutch-daemons.sh start datanode
-$bin/nutch-daemon.sh start namenode
-$bin/nutch-daemon.sh start jobtracker
-$bin/nutch-daemons.sh start tasktracker
+$bin/hadoop-daemons.sh start datanode
+$bin/hadoop-daemon.sh start namenode
+$bin/hadoop-daemon.sh start jobtracker
+$bin/hadoop-daemons.sh start tasktracker

Copied: lucene/hadoop/trunk/bin/stop-all.sh (from r374710, lucene/nutch/trunk/bin/stop-all.sh)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/stop-all.sh?p2=lucene/hadoop/trunk/bin/stop-all.sh&p1=lucene/nutch/trunk/bin/stop-all.sh&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/stop-all.sh (original)
+++ lucene/hadoop/trunk/bin/stop-all.sh Fri Feb  3 11:45:32 2006
@@ -1,11 +1,11 @@
 #!/bin/bash
 
-# Stop all nutch daemons.  Run this on master node.
+# Stop all hadoop daemons.  Run this on master node.
 
 bin=`dirname $0`
 bin=`cd $bin; pwd`
 
-$bin/nutch-daemon.sh stop jobtracker
-$bin/nutch-daemons.sh stop tasktracker
-$bin/nutch-daemon.sh stop namenode
-$bin/nutch-daemons.sh stop datanode
+$bin/hadoop-daemon.sh stop jobtracker
+$bin/hadoop-daemons.sh stop tasktracker
+$bin/hadoop-daemon.sh stop namenode
+$bin/hadoop-daemons.sh stop datanode

Copied: lucene/hadoop/trunk/build.xml (from r374710, lucene/nutch/trunk/build.xml)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/build.xml?p2=lucene/hadoop/trunk/build.xml&p1=lucene/nutch/trunk/build.xml&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/build.xml (original)
+++ lucene/hadoop/trunk/build.xml Fri Feb  3 11:45:32 2006
@@ -1,13 +1,52 @@
 <?xml version="1.0"?>
 
-<project name="Nutch" default="compile">
+<project name="Hadoop" default="compile">
 
   <!-- Load all the default properties, and any the user wants    -->
   <!-- to contribute (without having to type -D or edit this file -->
   <property file="${user.home}/build.properties" />
   <property file="${basedir}/build.properties" />
-  <property file="${basedir}/default.properties" />
  
+  <property name="Name" value="Hadoop"/>
+  <property name="name" value="hadoop"/>
+  <property name="version" value="0.1-dev"/>
+  <property name="final.name" value="${name}-${version}"/>
+  <property name="year" value="2006"/>
+
+  <property name="basedir" value="./"/>
+  <property name="src.dir" value="src/java"/>
+  <property name="lib.dir" value="lib"/>
+  <property name="conf.dir" value="conf"/>
+  <property name="docs.dir" value="docs"/>
+  <property name="docs.src" value="${basedir}/src/web"/>
+
+  <property name="build.dir" value="build"/>
+  <property name="build.classes" value="${build.dir}/classes"/>
+  <property name="build.webapps" value="${build.dir}/webapps"/>
+  <property name="build.docs" value="${build.dir}/docs"/>
+  <property name="build.javadoc" value="${build.docs}/api"/>
+  <property name="build.encoding" value="ISO-8859-1"/>
+
+  <property name="test.src.dir" value="src/test"/>
+  <property name="test.build.dir" value="${build.dir}/test"/>
+  <property name="test.build.data" value=" ${test.build.dir}/data"/>
+  <property name="test.build.classes" value="${test.build.dir}/classes"/>
+  <property name="test.build.javadoc" value="${test.build.dir}/docs/api"/>
+
+  <property name="web.src.dir" value="src/web"/>
+  <property name="src.webapps" value="src/webapps"/>
+
+  <property name="javadoc.link.java"
+	    value="http://java.sun.com/j2se/1.4.2/docs/api/"/>
+  <property name="javadoc.packages" value="org.apache.hadoop.*"/>
+
+  <property name="dist.dir" value="${build.dir}/${final.name}"/>
+
+  <property name="javac.debug" value="on"/>
+  <property name="javac.optimize" value="on"/>
+  <property name="javac.deprecation" value="off"/>
+  <property name="javac.version" value="1.4"/>
+
   <!-- the normal classpath -->
   <path id="classpath">
     <pathelement location="${build.classes}"/>
@@ -17,21 +56,13 @@
   </path>
 
   <!-- the unit test classpath -->
-  <dirname property="plugins.classpath.dir" file="${build.plugins}"/>
   <path id="test.classpath">
     <pathelement location="${test.build.classes}" />
     <pathelement location="${conf.dir}"/>
     <pathelement location="${test.src.dir}"/>
-    <pathelement location="${plugins.classpath.dir}"/>
     <path refid="classpath"/>
   </path>
 
-  <!-- xmlcatalog definition for xslt task -->
-  <xmlcatalog id="docDTDs">
-     <dtd publicId="-//W3C//DTD XHTML 1.0 Transitional//EN"            
-          location="${xmlcatalog.dir}/xhtml1-transitional.dtd"/> 
-  </xmlcatalog> 
-
   <!-- ====================================================== -->
   <!-- Stuff needed by all targets                            -->
   <!-- ====================================================== -->
@@ -55,20 +86,16 @@
       <fileset dir="${conf.dir}" includes="**/*.template"/>
       <mapper type="glob" from="*.template" to="*"/>
     </copy>
-
-
   </target>
 
   <!-- ====================================================== -->
   <!-- Compile the Java files                                 -->
   <!-- ====================================================== -->
-  <target name="compile" depends="compile-core, compile-plugins"/>
-
-  <target name="compile-core" depends="init">
+  <target name="compile" depends="init">
     <javac 
      encoding="${build.encoding}" 
      srcdir="${src.dir}"
-     includes="org/apache/nutch/**/*.java"
+     includes="org/apache/hadoop/**/*.java"
      destdir="${build.classes}"
      debug="${javac.debug}"
      optimize="${javac.optimize}"
@@ -79,32 +106,13 @@
     </javac>    
   </target>
 
-  <target name="compile-plugins">
-    <ant dir="src/plugin" target="deploy" inheritAll="false"/>
-  </target>
-
-  <target name="generate-src" depends="init">
-    <javacc target="${src.dir}/org/apache/nutch/analysis/NutchAnalysis.jj"
-            javacchome="${javacc.home}">
-    </javacc>
-
-    <fixcrlf srcdir="${src.dir}" eol="lf" includes="**/*.java"/>
-
-  </target>
-
-  <target name="dynamic" depends="generate-src, compile">
-  </target>
-
-
   <!-- ================================================================== -->
-  <!-- Make nutch.jar                                                     -->
+  <!-- Make hadoop.jar                                                     -->
   <!-- ================================================================== -->
   <!--                                                                    -->
   <!-- ================================================================== -->
-  <target name="jar" depends="compile-core">
-    <copy file="${conf.dir}/nutch-default.xml"
-          todir="${build.classes}"/>
-    <copy file="${conf.dir}/nutch-site.xml"
+  <target name="jar" depends="compile">
+    <copy file="${conf.dir}/hadoop-default.xml"
           todir="${build.classes}"/>
     <jar jarfile="${build.dir}/${final.name}.jar"
          basedir="${build.classes}">
@@ -114,44 +122,13 @@
   </target>
 
   <!-- ================================================================== -->
-  <!-- Make nutch.war                                                     -->
-  <!-- ================================================================== -->
-  <!--                                                                    -->
-  <!-- ================================================================== -->
-  <target name="war" depends="jar,compile,generate-docs">
-    <war destfile="${build.dir}/${final.name}.war"
-	 webxml="${web.src.dir}/web.xml">
-      <fileset dir="${web.src.dir}/jsp"/>
-      <zipfileset dir="${docs.src}" includes="include/*.html"/>
-      <zipfileset dir="${build.docs}" includes="*/include/*.html"/>
-      <fileset dir="${docs.dir}"/>
-      <lib dir="${lib.dir}">
-	<include name="lucene*.jar"/>
-	<include name="taglibs-*.jar"/>
-	<include name="dom4j-*.jar"/>
-	<include name="xerces-*.jar"/>
-      </lib>
-      <lib dir="${build.dir}">
-	<include name="${final.name}.jar"/>
-      </lib>
-      <classes dir="${conf.dir}" excludes="**/*.template"/>
-      <classes dir="${web.src.dir}/locale"/>
-      <zipfileset prefix="WEB-INF/classes/plugins" dir="${build.plugins}"/>
-      <webinf dir="${lib.dir}">
-	<include name="taglibs-*.tld"/>
-      </webinf>
-    </war>
-   </target>
-
-
-  <!-- ================================================================== -->
   <!-- Compile test code                                                  --> 
   <!-- ================================================================== -->
-  <target name="compile-core-test" depends="compile-core">
+  <target name="compile-test" depends="compile">
     <javac 
      encoding="${build.encoding}" 
      srcdir="${test.src.dir}"
-     includes="org/apache/nutch/**/*.java"
+     includes="org/apache/hadoop/**/*.java"
      destdir="${test.build.classes}"
      debug="${javac.debug}"
      optimize="${javac.optimize}"
@@ -165,14 +142,12 @@
   <!-- ================================================================== -->
   <!-- Run unit tests                                                     --> 
   <!-- ================================================================== -->
-  <target name="test" depends="test-core, test-plugins"/>
-
-  <target name="test-core" depends="compile, compile-core-test">
+  <target name="test" depends="compile, compile-test">
 
     <delete dir="${test.build.data}"/>
     <mkdir dir="${test.build.data}"/>
 
-    <copy file="${test.src.dir}/nutch-site.xml"
+    <copy file="${test.src.dir}/hadoop-site.xml"
           todir="${test.build.classes}"/>
 
     <junit printsummary="yes" haltonfailure="no" fork="yes" dir="${basedir}"
@@ -194,10 +169,6 @@
 
   </target>   
 
-  <target name="test-plugins" depends="compile">
-    <ant dir="src/plugin" target="test" inheritAll="false"/>
-  </target>
-
   <target name="nightly" depends="test, tar">
   </target>
 
@@ -217,147 +188,14 @@
       bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
       >
     	<packageset dir="${src.dir}"/>
-    	<packageset dir="${plugins.dir}/protocol-file/src/java"/>
-    	<packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
-       	<packageset dir="${plugins.dir}/protocol-http/src/java"/>
-       	<packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
-    	<packageset dir="${plugins.dir}/parse-html/src/java"/>
-    	<packageset dir="${plugins.dir}/parse-js/src/java"/>
-    	<packageset dir="${plugins.dir}/parse-text/src/java"/>
-    	<packageset dir="${plugins.dir}/parse-pdf/src/java"/>
-<!--	<packageset dir="${plugins.dir}/parse-rtf/src/java"/> plugin excluded from build due to licensing issues-->
-<!--	<packageset dir="${plugins.dir}/parse-mp3/src/java"/> plugin excluded from build due to licensing issues-->
-    	<packageset dir="${plugins.dir}/parse-msword/src/java"/>
-    	<packageset dir="${plugins.dir}/index-basic/src/java"/>
-    	<packageset dir="${plugins.dir}/index-more/src/java"/>
-    	<packageset dir="${plugins.dir}/query-more/src/java"/>
-    	<packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
-    	<packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
-    	<packageset dir="${plugins.dir}/creativecommons/src/java"/>
-    	<packageset dir="${plugins.dir}/languageidentifier/src/java"/>
-    	<packageset dir="${plugins.dir}/clustering-carrot2/src/java"/>
-    	<packageset dir="${plugins.dir}/ontology/src/java"/>
         <link href="${javadoc.link.java}"/>
-        <link href="${javadoc.link.lucene}"/>
         <classpath refid="classpath"/>
-    	<classpath>
-    		<fileset dir="${plugins.dir}" >
-    			<include name="**/*.jar"/>
-    		</fileset>
-    	</classpath>
-    	<group title="Core" packages="org.apache.nutch.*"/>
-    	<group title="Plugins" packages="${plugins.packages}"/>
     </javadoc>
   </target>	
 	
   <target name="default-doc">
     <style basedir="${conf.dir}" destdir="${docs.dir}"
-           includes="nutch-default.xml" style="conf/nutch-conf.xsl"/>
-  </target>
-
-  <target name="generate-locale" if="doc.locale">
-    <echo message="Generating docs for locale=${doc.locale}"/>
-
-    <mkdir dir="${build.docs}/${doc.locale}/include"/>
-    <xslt in="${docs.src}/include/${doc.locale}/header.xml"
-          out="${build.docs}/${doc.locale}/include/header.html"
-          style="${docs.src}/style/nutch-header.xsl">
-        <xmlcatalog refid="docDTDs"/>
-    </xslt>
-
-    <dependset>
-       <srcfileset dir="${docs.src}/include/${doc.locale}" includes="*.xml"/>
-       <srcfileset dir="${docs.src}/style" includes="*.xsl"/>
-       <targetfileset dir="${docs.dir}/${doc.locale}" includes="*.html"/>
-    </dependset>  
-
-    <copy file="${docs.src}/style/nutch-page.xsl"
-          todir="${build.docs}/${doc.locale}"
-          preservelastmodified="true"/>
-
-    <xslt basedir="${docs.src}/pages/${doc.locale}"
-          destdir="${docs.dir}/${doc.locale}"
-          includes="*.xml"
-          style="${build.docs}/${doc.locale}/nutch-page.xsl">
-         <xmlcatalog refid="docDTDs"/>
-    </xslt>
-  </target>
-
-
-  <target name="generate-docs" depends="init">
-    <dependset>
-       <srcfileset dir="${docs.src}/include" includes="*.html"/>
-       <targetfileset dir="${docs.dir}" includes="**/*.html"/>
-    </dependset>  
-
-    <mkdir dir="${build.docs}/include"/>
-    <copy todir="${build.docs}/include">
-      <fileset dir="${docs.src}/include"/>
-    </copy>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="ca"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="de"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="en"/>
-    </antcall>
-    
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="es"/>
-    </antcall>
-    
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="fi"/>
-    </antcall>
-    
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="fr"/>
-    </antcall>
-    
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="hu"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="jp"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="ms"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="nl"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="pl"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="pt"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="sv"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="th"/>
-    </antcall>
-
-    <antcall target="generate-locale">
-      <param name="doc.locale" value="zh"/>
-    </antcall>
-
-    <fixcrlf srcdir="${docs.dir}" eol="lf" encoding="utf-8"
-             includes="**/*.html"/>
-
+           includes="hadoop-default.xml" style="conf/hadoop-conf.xsl"/>
   </target>
 
   <!-- ================================================================== -->
@@ -365,13 +203,12 @@
   <!-- ================================================================== -->
   <!--                                                                    -->
   <!-- ================================================================== -->
-  <target name="package" depends="jar, war, javadoc">
+  <target name="package" depends="jar, javadoc">
     <mkdir dir="${dist.dir}"/>
     <mkdir dir="${dist.dir}/lib"/>
     <mkdir dir="${dist.dir}/bin"/>
     <mkdir dir="${dist.dir}/docs"/>
     <mkdir dir="${dist.dir}/docs/api"/>
-    <mkdir dir="${dist.dir}/plugins"/>
 
     <copy todir="${dist.dir}/lib" includeEmptyDirs="false">
       <fileset dir="lib"/>
@@ -381,14 +218,8 @@
       <fileset dir="${build.webapps}"/>
     </copy>
 
-    <copy todir="${dist.dir}/plugins">
-      <fileset dir="${build.plugins}"/>
-    </copy>
-
     <copy file="${build.dir}/${final.name}.jar" todir="${dist.dir}"/>
 
-    <copy file="${build.dir}/${final.name}.war" todir="${dist.dir}"/>
-
     <copy todir="${dist.dir}/bin">
       <fileset dir="bin"/>
     </copy>
@@ -400,10 +231,6 @@
     <chmod perm="ugo+x" type="file">
         <fileset dir="${dist.dir}/bin"/>
     </chmod>
-
-    <copy todir="${dist.dir}/docs">
-      <fileset dir="${docs.dir}"/>
-    </copy>
 
     <copy todir="${dist.dir}/docs/api">
       <fileset dir="${build.javadoc}"/>

Copied: lucene/hadoop/trunk/conf/hadoop-default.xml (from r374710, lucene/nutch/trunk/conf/nutch-default.xml)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/conf/hadoop-default.xml?p2=lucene/hadoop/trunk/conf/hadoop-default.xml&p1=lucene/nutch/trunk/conf/nutch-default.xml&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/hadoop/trunk/conf/hadoop-default.xml Fri Feb  3 11:45:32 2006
@@ -7,110 +7,7 @@
 
 <nutch-conf>
 
-<!-- HTTP properties -->
-
-<property>
-  <name>http.agent.name</name>
-  <value>NutchCVS</value>
-  <description>Our HTTP 'User-Agent' request header.</description>
-</property>
-
-<property>
-  <name>http.robots.agents</name>
-  <value>NutchCVS,Nutch,*</value>
-  <description>The agent strings we'll look for in robots.txt files,
-  comma-separated, in decreasing order of precedence.</description>
-</property>
-
-<property>
-  <name>http.robots.403.allow</name>
-  <value>true</value>
-  <description>Some servers return HTTP status 403 (Forbidden) if
-  /robots.txt doesn't exist. This should probably mean that we are
-  allowed to crawl the site nonetheless. If this is set to false,
-  then such sites will be treated as forbidden.</description>
-</property>
-
-<property>
-  <name>http.agent.description</name>
-  <value>Nutch</value>
-  <description>Further description of our bot- this text is used in
-  the User-Agent header.  It appears in parenthesis after the agent name.
-  </description>
-</property>
-
-<property>
-  <name>http.agent.url</name>
-  <value>http://lucene.apache.org/nutch/bot.html</value>
-  <description>A URL to advertise in the User-Agent header.  This will 
-   appear in parenthesis after the agent name.
-  </description>
-</property>
-
-<property>
-  <name>http.agent.email</name>
-  <value>nutch-agent@lucene.apache.org</value>
-  <description>An email address to advertise in the HTTP 'From' request
-   header and User-Agent header.</description>
-</property>
-
-<property>
-  <name>http.agent.version</name>
-  <value>0.8-dev</value>
-  <description>A version string to advertise in the User-Agent 
-   header.</description>
-</property>
-
-<property>
-  <name>http.timeout</name>
-  <value>10000</value>
-  <description>The default network timeout, in milliseconds.</description>
-</property>
-
-<property>
-  <name>http.max.delays</name>
-  <value>100</value>
-  <description>The number of times a thread will delay when trying to
-  fetch a page.  Each time it finds that a host is busy, it will wait
-  fetcher.server.delay.  After http.max.delays attepts, it will give
-  up on the page for now.</description>
-</property>
-
-<property>
-  <name>http.content.limit</name>
-  <value>65536</value>
-  <description>The length limit for downloaded content, in bytes.
-  If this value is nonnegative (>=0), content longer than it will be truncated;
-  otherwise, no truncation at all.
-  </description>
-</property>
-
-<property>
-  <name>http.proxy.host</name>
-  <value></value>
-  <description>The proxy hostname.  If empty, no proxy is used.</description>
-</property>
-
-<property>
-  <name>http.proxy.port</name>
-  <value></value>
-  <description>The proxy port.</description>
-</property>
-
-<property>
-  <name>http.verbose</name>
-  <value>false</value>
-  <description>If true, HTTP will log more verbosely.</description>
-</property>
-
-<property>
-  <name>http.redirect.max</name>
-  <value>3</value>
-  <description>The maximum number of redirects the fetcher will follow when
-    trying to fetch a page.</description>
-</property>
-
-<!-- FILE properties -->
+<!-- file properties -->
 
 <property>
   <name>file.content.limit</name>
@@ -132,212 +29,6 @@
   </description>
 </property>
 
-<!-- FTP properties -->
-
-<property>
-  <name>ftp.username</name>
-  <value>anonymous</value>
-  <description>ftp login username.</description>
-</property>
-
-<property>
-  <name>ftp.password</name>
-  <value>anonymous@example.com</value>
-  <description>ftp login password.</description>
-</property>
-
-<property>
-  <name>ftp.content.limit</name>
-  <value>65536</value> 
-  <description>The length limit for downloaded content, in bytes.
-  If this value is larger than zero, content longer than it is truncated;
-  otherwise (zero or negative), no truncation at all. Caution: classical
-  ftp RFCs never defines partial transfer and, in fact, some ftp servers
-  out there do not handle client side forced close-down very well.
-  Our implementation tries its best to handle such situations smoothly.
-  </description>
-</property>
-
-<property>
-  <name>ftp.timeout</name>
-  <value>60000</value>
-  <description>Default timeout for ftp client socket, in millisec.
-  Please also see ftp.keep.connection below.</description>
-</property>
-
-<property>
-  <name>ftp.server.timeout</name>
-  <value>100000</value>
-  <description>An estimation of ftp server idle time, in millisec.
-  Typically it is 120000 millisec for many ftp servers out there.
-  Better be conservative here. Together with ftp.timeout, it is used to
-  decide if we need to delete (annihilate) current ftp.client instance and
-  force to start another ftp.client instance anew. This is necessary because
-  a fetcher thread may not be able to obtain next request from queue in time
-  (due to idleness) before our ftp client times out or remote server
-  disconnects. Used only when ftp.keep.connection is true (please see below).
-  </description>
-</property>
-
-<property>
-  <name>ftp.keep.connection</name>
-  <value>false</value>
-  <description>Whether to keep ftp connection. Useful if crawling same host
-  again and again. When set to true, it avoids connection, login and dir list
-  parser setup for subsequent urls. If it is set to true, however, you must
-  make sure (roughly):
-  (1) ftp.timeout is less than ftp.server.timeout
-  (2) ftp.timeout is larger than (fetcher.threads.fetch * fetcher.server.delay)
-  Otherwise there will be too many "delete client because idled too long"
-  messages in thread logs.</description>
-</property>
-
-<property>
-  <name>ftp.follow.talk</name>
-  <value>false</value>
-  <description>Whether to log dialogue between our client and remote
-  server. Useful for debugging.</description>
-</property>
-
-<!-- web db properties -->
-
-<property>
-  <name>db.default.fetch.interval</name>
-  <value>30</value>
-  <description>The default number of days between re-fetches of a page.
-  </description>
-</property>
-
-<property>
-  <name>db.ignore.internal.links</name>
-  <value>true</value>
-  <description>If true, when adding new links to a page, links from
-  the same host are ignored.  This is an effective way to limit the
-  size of the link database, keeping the only the highest quality
-  links.
-  </description>
-</property>
-
-<property>
-  <name>db.score.injected</name>
-  <value>1.0</value>
-  <description>The score of new pages added by the injector.
-  </description>
-</property>
-
-<property>
-  <name>db.score.link.external</name>
-  <value>1.0</value>
-  <description>The score factor for new pages added due to a link from
-  another host relative to the referencing page's score.
-  </description>
-</property>
-
-<property>
-  <name>db.score.link.internal</name>
-  <value>1.0</value>
-  <description>The score factor for pages added due to a link from the
-  same host, relative to the referencing page's score.
-  </description>
-</property>
-
-<property>
-  <name>db.max.outlinks.per.page</name>
-  <value>100</value>
-  <description>The maximum number of outlinks that we'll process for a page.
-  </description>
-</property>
-
-<property>
-  <name>db.max.anchor.length</name>
-  <value>100</value>
-  <description>The maximum number of characters permitted in an anchor.
-  </description>
-</property>
-
-<property>
-  <name>db.fetch.retry.max</name>
-  <value>3</value>
-  <description>The maximum number of times a url that has encountered
-  recoverable errors is generated for fetch.</description>
-</property>
-
-<property>
-  <name>db.signature.class</name>
-  <value>org.apache.nutch.crawl.MD5Signature</value>
-  <description>The default implementation of a page signature. Signatures
-  created with this implementation will be used for duplicate detection
-  and removal.</description>
-</property>
-
-<property>
-  <name>db.signature.text_profile.min_token_len</name>
-  <value>2</value>
-  <description>Minimum token length to be included in the signature.
-  </description>
-</property>
-
-<property>
-  <name>db.signature.text_profile.quant_rate</name>
-  <value>0.01</value>
-  <description>Profile frequencies will be rounded down to a multiple of
-  QUANT = (int)(QUANT_RATE * maxFreq), where maxFreq is a maximum token
-  frequency. If maxFreq > 1 then QUANT will be at least 2, which means that
-  for longer texts tokens with frequency 1 will always be discarded.
-  </description>
-</property>
-
-<!-- generate properties -->
-
-<property>
-  <name>generate.max.per.host</name>
-  <value>-1</value>
-  <description>The maximum number of urls per host in a single
-  fetchlist.  -1 if unlimited.</description>
-</property>
-
-<!-- fetcher properties -->
-
-<property>
-  <name>fetcher.server.delay</name>
-  <value>5.0</value>
-  <description>The number of seconds the fetcher will delay between 
-   successive requests to the same server.</description>
-</property>
-
-<property>
-  <name>fetcher.threads.fetch</name>
-  <value>10</value>
-  <description>The number of FetcherThreads the fetcher should use.
-    This is also determines the maximum number of requests that are 
-    made at once (each FetcherThread handles one connection).</description>
-</property>
-
-<property>
-  <name>fetcher.threads.per.host</name>
-  <value>1</value>
-  <description>This number is the maximum number of threads that
-    should be allowed to access a host at one time.</description>
-</property>
-
-<property>
-  <name>fetcher.verbose</name>
-  <value>false</value>
-  <description>If true, fetcher will log more verbosely.</description>
-</property>
-
-<property>
-  <name>fetcher.parse</name>
-  <value>true</value>
-  <description>If true, fetcher will parse content.</description>
-</property>
-
-<property>
-  <name>fetcher.store.content</name>
-  <value>true</value>
-  <description>If true, fetcher will store content.</description>
-</property>
-
 <!-- i/o properties -->
 
 <property>
@@ -393,35 +84,35 @@
   <name>fs.default.name</name>
   <value>local</value>
   <description>The name of the default file system.  Either the
-  literal string "local" or a host:port for NDFS.</description>
+  literal string "local" or a host:port for DFS.</description>
 </property>
 
 <property>
-  <name>ndfs.datanode.port</name>
+  <name>dfs.datanode.port</name>
   <value>50010</value>
-  <description>The port number that the ndfs datanode server uses as a starting 
+  <description>The port number that the dfs datanode server uses as a starting 
 	       point to look for a free port to listen on.
 </description>
 </property>
 
 <property>
-  <name>ndfs.name.dir</name>
-  <value>/tmp/nutch/ndfs/name</value>
-  <description>Determines where on the local filesystem the NDFS name node
+  <name>dfs.name.dir</name>
+  <value>/tmp/nutch/dfs/name</value>
+  <description>Determines where on the local filesystem the DFS name node
       should store the name table.</description>
 </property>
 
 <property>
-  <name>ndfs.data.dir</name>
-  <value>/tmp/nutch/ndfs/data</value>
-  <description>Determines where on the local filesystem an NDFS data node
+  <name>dfs.data.dir</name>
+  <value>/tmp/nutch/dfs/data</value>
+  <description>Determines where on the local filesystem an DFS data node
   should store its blocks.  If this is a comma- or space-delimited
   list of directories, then data will be stored in all named
   directories, typically on different devices.</description>
 </property>
 
 <property>
-  <name>ndfs.replication</name>
+  <name>dfs.replication</name>
   <value>3</value>
   <description>How many copies we try to have at all times. The actual
   number of replications is at max the number of datanodes in the
@@ -534,198 +225,6 @@
   combining them and writing to disk.</description>
 </property>
 
-<!-- indexer properties -->
-
-<property>
-  <name>indexer.score.power</name>
-  <value>0.5</value>
-  <description>Determines the power of link analyis scores.  Each
-  pages's boost is set to <i>score<sup>scorePower</sup></i> where
-  <i>score</i> is its link analysis score and <i>scorePower</i> is the
-  value of this parameter.  This is compiled into indexes, so, when
-  this is changed, pages must be re-indexed for it to take
-  effect.</description>
-</property>
-
-<property>
-  <name>indexer.max.title.length</name>
-  <value>100</value>
-  <description>The maximum number of characters of a title that are indexed.
-  </description>
-</property>
-
-<property>
-  <name>indexer.max.tokens</name>
-  <value>10000</value>
-  <description>
-  The maximum number of tokens that will be indexed for a single field
-  in a document. This limits the amount of memory required for
-  indexing, so that collections with very large files will not crash
-  the indexing process by running out of memory.
-
-  Note that this effectively truncates large documents, excluding
-  from the index tokens that occur further in the document. If you
-  know your source documents are large, be sure to set this value
-  high enough to accomodate the expected size. If you set it to
-  Integer.MAX_VALUE, then the only limit is your memory, but you
-  should anticipate an OutOfMemoryError.
-  </description>
-</property>
-
-<property>
-  <name>indexer.mergeFactor</name>
-  <value>50</value>
-  <description>The factor that determines the frequency of Lucene segment
-  merges. This must not be less than 2, higher values increase indexing
-  speed but lead to increased RAM usage, and increase the number of
-  open file handles (which may lead to "Too many open files" errors).
-  NOTE: the "segments" here have nothing to do with Nutch segments, they
-  are a low-level data unit used by Lucene.
-  </description>
-</property>
-
-<property>
-  <name>indexer.minMergeDocs</name>
-  <value>50</value>
-  <description>This number determines the minimum number of Lucene
-  Documents buffered in memory between Lucene segment merges. Larger
-  values increase indexing speed and increase RAM usage.
-  </description>
-</property>
-
-<property>
-  <name>indexer.maxMergeDocs</name>
-  <value>2147483647</value>
-  <description>This number determines the maximum number of Lucene
-  Documents to be merged into a new Lucene segment. Larger values
-  increase batch indexing speed and reduce the number of Lucene segments,
-  which reduces the number of open file handles; however, this also
-  decreases incremental indexing performance.
-  </description>
-</property>
-
-<property>
-  <name>indexer.termIndexInterval</name>
-  <value>128</value>
-  <description>Determines the fraction of terms which Lucene keeps in
-  RAM when searching, to facilitate random-access.  Smaller values use
-  more memory but make searches somewhat faster.  Larger values use
-  less memory but make searches somewhat slower.
-  </description>
-</property>
-
-
-<!-- analysis properties -->
-
-<property>
-  <name>analysis.common.terms.file</name>
-  <value>common-terms.utf8</value>
-  <description>The name of a file containing a list of common terms
-  that should be indexed in n-grams.</description>
-</property>
-
-<!-- searcher properties -->
-
-<property>
-  <name>searcher.dir</name>
-  <value>crawl</value>
-  <description>
-  Path to root of crawl.  This directory is searched (in
-  order) for either the file search-servers.txt, containing a list of
-  distributed search servers, or the directory "index" containing
-  merged indexes, or the directory "segments" containing segment
-  indexes.
-  </description>
-</property>
-
-<property>
-  <name>searcher.filter.cache.size</name>
-  <value>16</value>
-  <description>
-  Maximum number of filters to cache.  Filters can accelerate certain
-  field-based queries, like language, document format, etc.  Each
-  filter requires one bit of RAM per page.  So, with a 10 million page
-  index, a cache size of 16 consumes two bytes per page, or 20MB.
-  </description>
-</property>
-
-<property>
-  <name>searcher.filter.cache.threshold</name>
-  <value>0.05</value>
-  <description>
-  Filters are cached when their term is matched by more than this
-  fraction of pages.  For example, with a threshold of 0.05, and 10
-  million pages, the term must match more than 1/20, or 50,000 pages.
-  So, if out of 10 million pages, 50% of pages are in English, and 2%
-  are in Finnish, then, with a threshold of 0.05, searches for
-  "lang:en" will use a cached filter, while searches for "lang:fi"
-  will score all 20,000 finnish documents.
-  </description>
-</property>
-
-<property>
-  <name>searcher.hostgrouping.rawhits.factor</name>
-  <value>2.0</value>
-  <description>
-  A factor that is used to determine the number of raw hits
-  initially fetched, before host grouping is done.
-  </description>
-</property>
-
-<property>
-  <name>searcher.summary.context</name>
-  <value>5</value>
-  <description>
-  The number of context terms to display preceding and following
-  matching terms in a hit summary.
-  </description>
-</property>
-
-<property>
-  <name>searcher.summary.length</name>
-  <value>20</value>
-  <description>
-  The total number of terms to display in a hit summary.
-  </description>
-</property>
-
-<property>
-  <name>searcher.max.hits</name>
-  <value>-1</value>
-  <description>If positive, search stops after this many hits are
-  found.  Setting this to small, positive values (e.g., 1000) can make
-  searches much faster.  With a sorted index, the quality of the hits
-  suffers little.</description>
-</property>
-
-<!-- URL normalizer properties -->
-
-<property>
-  <name>urlnormalizer.class</name>
-  <value>org.apache.nutch.net.BasicUrlNormalizer</value>
-  <description>Name of the class used to normalize URLs.</description>
-</property>
-
-<property>
-  <name>urlnormalizer.regex.file</name>
-  <value>regex-normalize.xml</value>
-  <description>Name of the config file used by the RegexUrlNormalizer class.</description></property>
-
-<!-- mime properties -->
-
-<property>
-  <name>mime.types.file</name>
-  <value>mime-types.xml</value>
-  <description>Name of file in CLASSPATH containing filename extension and
-  magic sequence to mime types mapping information</description>
-</property>
-
-<property>
-  <name>mime.type.magic</name>
-  <value>true</value>
-  <description>Defines if the mime content type detector uses magic resolution.
-  </description>
-</property>
 
 <!-- ipc properties -->
 
@@ -733,220 +232,6 @@
   <name>ipc.client.timeout</name>
   <value>60000</value>
   <description>Defines the timeout for IPC calls in milliseconds.</description>
-</property>
-
-<!-- plugin properties -->
-
-<property>
-  <name>plugin.folders</name>
-  <value>plugins</value>
-  <description>Directories where nutch plugins are located.  Each
-  element may be a relative or absolute path.  If absolute, it is used
-  as is.  If relative, it is searched for on the classpath.</description>
-</property>
-
-<property>
-  <name>plugin.auto-activation</name>
-  <value>true</value>
-  <description>Defines if some plugins that are not activated regarding
-  the plugin.includes and plugin.excludes properties must be automaticaly
-  activated if they are needed by some actived plugins.
-  </description>
-</property>
-
-<property>
-  <name>plugin.includes</name>
-  <value>protocol-http|urlfilter-regex|parse-(text|html|js)|index-basic|query-(basic|site|url)</value>
-  <description>Regular expression naming plugin directory names to
-  include.  Any plugin not matching this expression is excluded.
-  In any case you need at least include the nutch-extensionpoints plugin. By
-  default Nutch includes crawling just HTML and plain text via HTTP,
-  and basic indexing and search plugins.
-  </description>
-</property>
-
-<property>
-  <name>plugin.excludes</name>
-  <value></value>
-  <description>Regular expression naming plugin directory names to exclude.  
-  </description>
-</property>
-
-<!-- parser properties -->
-
-<property>
-  <name>parse.plugin.file</name>
-  <value>parse-plugins.xml</value>
-  <description>The name of the file that defines the associations between
-  content-types and parsers.</description>
-</property>
-
-<property>
-  <name>parser.character.encoding.default</name>
-  <value>windows-1252</value>
-  <description>The character encoding to fall back to when no other information
-  is available</description>
-</property>
-
-<property>
-  <name>parser.html.impl</name>
-  <value>neko</value>
-  <description>HTML Parser implementation. Currently the following keywords
-  are recognized: "neko" uses NekoHTML, "tagsoup" uses TagSoup.
-  </description>
-</property>
-
-<!-- urlfilter plugin properties -->
-
-<property>
-  <name>urlfilter.regex.file</name>
-  <value>regex-urlfilter.txt</value>
-  <description>Name of file on CLASSPATH containing regular expressions
-  used by urlfilter-regex (RegexURLFilter) plugin.</description>
-</property>
-
-<property>
-  <name>urlfilter.prefix.file</name>
-  <value>prefix-urlfilter.txt</value>
-  <description>Name of file on CLASSPATH containing url prefixes
-  used by urlfilter-prefix (PrefixURLFilter) plugin.</description>
-</property>
-
-<property>
-  <name>urlfilter.order</name>
-  <value></value>
-  <description>The order by which url filters are applied.
-  If empty, all available url filters (as dictated by properties
-  plugin-includes and plugin-excludes above) are loaded and applied in system
-  defined order. If not empty, only named filters are loaded and applied
-  in given order. For example, if this property has value:
-  org.apache.nutch.net.RegexURLFilter org.apache.nutch.net.PrefixURLFilter
-  then RegexURLFilter is applied first, and PrefixURLFilter second.
-  Since all filters are AND'ed, filter ordering does not have impact
-  on end result, but it may have performance implication, depending
-  on relative expensiveness of filters.
-  </description>
-</property>
-
-<!-- clustering extension properties -->
-
-<property>
-  <name>extension.clustering.hits-to-cluster</name>
-  <value>100</value>
-  <description>Number of snippets retrieved for the clustering extension
-  if clustering extension is available and user requested results
-  to be clustered.</description>
-</property>
-
-<property>
-  <name>extension.clustering.extension-name</name>
-  <value></value>
-  <description>Use the specified online clustering extension. If empty,
-  the first available extension will be used. The "name" here refers to an 'id'
-  attribute of the 'implementation' element in the plugin descriptor XML
-  file.</description>
-</property>
-
-<!-- ontology extension properties -->
-
-<property>
-  <name>extension.ontology.extension-name</name>
-  <value></value>
-  <description>Use the specified online ontology extension. If empty,
-  the first available extension will be used. The "name" here refers to an 'id'
-  attribute of the 'implementation' element in the plugin descriptor XML
-  file.</description>
-</property>
-
-<property>
-  <name>extension.ontology.urls</name>
-  <value>
-  </value>
-  <description>Urls of owl files, separated by spaces, such as
-  http://www.example.com/ontology/time.owl
-  http://www.example.com/ontology/space.owl
-  http://www.example.com/ontology/wine.owl
-  Or
-  file:/ontology/time.owl
-  file:/ontology/space.owl
-  file:/ontology/wine.owl
-  You have to make sure each url is valid.
-  By default, there is no owl file, so query refinement based on ontology
-  is silently ignored.
-  </description>
-</property>
-
-<!-- query-basic plugin properties -->
-
-<property>
-  <name>query.url.boost</name>
-  <value>4.0</value>
-  <description> Used as a boost for url field in Lucene query.
-  </description>
-</property>
-
-<property>
-  <name>query.anchor.boost</name>
-  <value>2.0</value>
-  <description> Used as a boost for anchor field in Lucene query.
-  </description>
-</property>
-
-
-<property>
-  <name>query.title.boost</name>
-  <value>1.5</value>
-  <description> Used as a boost for title field in Lucene query.
-  </description>
-</property>
-
-<property>
-  <name>query.host.boost</name>
-  <value>2.0</value>
-  <description> Used as a boost for host field in Lucene query.
-  </description>
-</property>
-
-<property>
-  <name>query.phrase.boost</name>
-  <value>1.0</value>
-  <description> Used as a boost for phrase in Lucene query.
-  Multiplied by boost for field phrase is matched in.
-  </description>
-</property>
-
-<!-- language-identifier plugin properties -->
-
-<property>
-  <name>lang.ngram.min.length</name>
-  <value>1</value>
-  <description> The minimum size of ngrams to uses to identify
-  language (must be between 1 and lang.ngram.max.length).
-  The larger is the range between lang.ngram.min.length and
-  lang.ngram.max.length, the better is the identification, but
-  the slowest it is.
-  </description>
-</property>
-
-<property>
-  <name>lang.ngram.max.length</name>
-  <value>4</value>
-  <description> The maximum size of ngrams to uses to identify
-  language (must be between lang.ngram.min.length and 4).
-  The larger is the range between lang.ngram.min.length and
-  lang.ngram.max.length, the better is the identification, but
-  the slowest it is.
-  </description>
-</property>
-
-<property>
-  <name>lang.analyze.max.length</name>
-  <value>2048</value>
-  <description> The maximum bytes of data to uses to indentify
-  the language (0 means full content analysis).
-  The larger is this value, the better is the analysis, but the
-  slowest it is.
-  </description>
 </property>
 
 </nutch-conf>

Copied: lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configurable.java (from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigurable.java)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configurable.java?p2=lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configurable.java&p1=lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigurable.java&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigurable.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configurable.java Fri Feb  3 11:45:32 2006
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-package org.apache.nutch.util;
+package org.apache.hadoop.conf;
 
-/** Something that may be configured with a {@link NutchConf}. */
-public interface NutchConfigurable {
+/** Something that may be configured with a {@link Configuration}. */
+public interface Configurable {
 
   /** Set the configuration to be used by this object. */
-  void setConf(NutchConf conf);
+  void setConf(Configuration conf);
 
   /** Return the configuration used by this object. */
-  NutchConf getConf();
+  Configuration getConf();
 }

Copied: lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configuration.java (from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConf.java)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configuration.java?p2=lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configuration.java&p1=lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConf.java&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConf.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configuration.java Fri Feb  3 11:45:32 2006
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package org.apache.nutch.util;
+package org.apache.hadoop.conf;
 
 import java.util.*;
 import java.net.URL;
@@ -23,14 +23,13 @@
 
 import javax.xml.parsers.*;
 
-import org.apache.nutch.plugin.PluginRepository;
 import org.w3c.dom.*;
 import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 
-/** Provides access to Nutch configuration parameters.
+/** Provides access to configuration parameters.
  * <p>An ordered list of configuration parameter files with
  * default and always-overrides site parameters.
  * <p>Default values for all parameters are specified in a file named
@@ -42,25 +41,23 @@
  * or write) after {@link #addConfResource(String)} or
  * {@link #addConfResource(File)}.
  */
-public class NutchConf {
+public class Configuration {
   private static final Logger LOG =
-    LogFormatter.getLogger("org.apache.nutch.util.NutchConf");
+    LogFormatter.getLogger("org.apache.hadoop.conf.Configuration");
 
   private ArrayList resourceNames = new ArrayList();
   private Properties properties;
   private ClassLoader classLoader = 
     Thread.currentThread().getContextClassLoader();
 
-  private PluginRepository pluginRepository;
-  
   /** A new configuration. */
-  public NutchConf() {
+  public Configuration() {
     resourceNames.add("nutch-default.xml");
     resourceNames.add("nutch-site.xml");
   }
 
   /** A new configuration with the same settings cloned from another. */
-  public NutchConf(NutchConf other) {
+  public Configuration(Configuration other) {
     this.resourceNames = (ArrayList)other.resourceNames.clone();
     if (other.properties != null)
       this.properties = (Properties)other.properties.clone();
@@ -89,16 +86,6 @@
   }
   
   /**
-   * @return a cached instance of the plugin repository
-   */
-  public PluginRepository getPluginRepository() {
-    if (this.pluginRepository == null) {
-      this.pluginRepository = new PluginRepository(this);
-    }
-    return this.pluginRepository;
-  }
-
-  /**
    * Returns the value of the <code>name</code> property, or null if no such
    * property exists.
    */
@@ -434,7 +421,7 @@
 
   public String toString() {
     StringBuffer sb = new StringBuffer(resourceNames.size()*30);
-    sb.append("NutchConf: ");
+    sb.append("Configuration: ");
     ListIterator i = resourceNames.listIterator();
     while (i.hasNext()) {
       if (i.nextIndex() != 0) {
@@ -452,7 +439,7 @@
 
   /** For debugging.  List non-default properties to the terminal and exit. */
   public static void main(String[] args) throws Exception {
-    new NutchConf().write(System.out);
+    new Configuration().write(System.out);
   }
 
 }

Copied: lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configured.java (from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigured.java)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configured.java?p2=lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configured.java&p1=lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigured.java&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigured.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configured.java Fri Feb  3 11:45:32 2006
@@ -14,25 +14,25 @@
  * limitations under the License.
  */
 
-package org.apache.nutch.util;
+package org.apache.hadoop.conf;
 
-/** Base class for things that may be configured with a {@link NutchConf}. */
-public class NutchConfigured implements NutchConfigurable {
+/** Base class for things that may be configured with a {@link Configuration}. */
+public class Configured implements Configurable {
 
-  private NutchConf conf;
+  private Configuration conf;
 
-  /** Construct a NutchConfigured. */
-  public NutchConfigured(NutchConf conf) {
+  /** Construct a Configured. */
+  public Configured(Configuration conf) {
     setConf(conf);
   }
 
   // inherit javadoc
-  public void setConf(NutchConf conf) {
+  public void setConf(Configuration conf) {
     this.conf = conf;
   }
 
   // inherit javadoc
-  public NutchConf getConf() {
+  public Configuration getConf() {
     return conf;
   }
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/Block.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/Block.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/Block.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/Block.java Fri Feb  3 11:45:32 2006
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
 
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
 
 import java.io.*;
 import java.util.*;

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java Fri Feb  3 11:45:32 2006
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
 
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
 
 import java.io.*;
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java Fri Feb  3 11:45:32 2006
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
 
 import java.io.*;
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
 
 /**********************************************************************
- * Protocol that an NDFS client uses to communicate with the NameNode.
+ * Protocol that an DFS client uses to communicate with the NameNode.
  * It's used to manipulate the namespace, and obtain datanode info.
  *
  * @author Mike Cafarella
@@ -126,7 +126,7 @@
     /**
      * Get a listing of the indicated directory
      */
-    public NDFSFileInfo[] getListing(String src) throws IOException;
+    public DFSFileInfo[] getListing(String src) throws IOException;
 
     /**
      * Get a set of statistics about the filesystem.

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DF.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DF.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DF.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DF.java Fri Feb  3 11:45:32 2006
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
 
 import java.io.File;
 import java.io.IOException;

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java Fri Feb  3 11:45:32 2006
@@ -13,11 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.ipc.*;
-import org.apache.nutch.util.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.ipc.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
 
 import java.io.*;
 import java.net.*;
@@ -36,12 +37,12 @@
  * @author Mike Cafarella
  **********************************************************/
 public class DataNode implements FSConstants, Runnable {
-    public static final Logger LOG = LogFormatter.getLogger("org.apache.nutch.ndfs.DataNode");
+    public static final Logger LOG = LogFormatter.getLogger("org.apache.hadoop.dfs.DataNode");
   //
     // REMIND - mjc - I might bring "maxgigs" back so user can place 
     // artificial  limit on space
     //private static final long GIGABYTE = 1024 * 1024 * 1024;
-    //private static long numGigs = NutchConf.get().getLong("ndfs.datanode.maxgigs", 100);
+    //private static long numGigs = Configuration.get().getLong("dfs.datanode.maxgigs", 100);
     //
 
     /**
@@ -70,12 +71,12 @@
     Daemon dataXceiveServer = null;
     long blockReportInterval;
     private long datanodeStartupPeriod;
-    private NutchConf fConf;
+    private Configuration fConf;
 
     /**
      * Create given a configuration and a dataDir.
      */
-    public DataNode(NutchConf conf, String datadir) throws IOException {
+    public DataNode(Configuration conf, String datadir) throws IOException {
         this(InetAddress.getLocalHost().getHostName(), 
              new File(datadir),
              createSocketAddr(conf.get("fs.default.name", "local")), conf);
@@ -84,12 +85,12 @@
     /**
      * Needs a directory to find its data (and config info)
      */
-    public DataNode(String machineName, File datadir, InetSocketAddress nameNodeAddr, NutchConf conf) throws IOException {
+    public DataNode(String machineName, File datadir, InetSocketAddress nameNodeAddr, Configuration conf) throws IOException {
         this.namenode = (DatanodeProtocol) RPC.getProxy(DatanodeProtocol.class, nameNodeAddr, conf);
         this.data = new FSDataset(datadir, conf);
 
         ServerSocket ss = null;
-        int tmpPort = conf.getInt("ndfs.datanode.port", 50010);
+        int tmpPort = conf.getInt("dfs.datanode.port", 50010);
         while (ss == null) {
             try {
                 ss = new ServerSocket(tmpPort);
@@ -104,11 +105,11 @@
         this.dataXceiveServer.start();
 
         long blockReportIntervalBasis =
-          conf.getLong("ndfs.blockreport.intervalMsec", BLOCKREPORT_INTERVAL);
+          conf.getLong("dfs.blockreport.intervalMsec", BLOCKREPORT_INTERVAL);
         this.blockReportInterval =
           blockReportIntervalBasis - new Random().nextInt((int)(blockReportIntervalBasis/10));
         this.datanodeStartupPeriod =
-          conf.getLong("ndfs.datanode.startupMsec", DATANODE_STARTUP_PERIOD);
+          conf.getLong("dfs.datanode.startupMsec", DATANODE_STARTUP_PERIOD);
     }
 
     /**
@@ -676,10 +677,10 @@
 
     /** Start datanode daemons.
      * Start a datanode daemon for each comma separated data directory
-     * specified in property ndfs.data.dir
+     * specified in property dfs.data.dir
      */
-    public static void run(NutchConf conf) throws IOException {
-        String[] dataDirs = conf.getStrings("ndfs.data.dir");
+    public static void run(Configuration conf) throws IOException {
+        String[] dataDirs = conf.getStrings("dfs.data.dir");
         subThreadList = new Vector(dataDirs.length);
         for (int i = 0; i < dataDirs.length; i++) {
           DataNode dn = makeInstanceForDir(dataDirs[i], conf);
@@ -694,10 +695,10 @@
 
   /** Start datanode daemons.
    * Start a datanode daemon for each comma separated data directory
-   * specified in property ndfs.data.dir and wait for them to finish.
+   * specified in property dfs.data.dir and wait for them to finish.
    * If this thread is specifically interrupted, it will stop waiting.
    */
-  private static void runAndWait(NutchConf conf) throws IOException {
+  private static void runAndWait(Configuration conf) throws IOException {
     run(conf);
 
     //  Wait for sub threads to exit
@@ -718,12 +719,12 @@
    * Make an instance of DataNode after ensuring that given data directory
    * (and parent directories, if necessary) can be created.
    * @param dataDir where the new DataNode instance should keep its files.
-   * @param conf NutchConf instance to use.
+   * @param conf Configuration instance to use.
    * @return DataNode instance for given data dir and conf, or null if directory
    * cannot be created.
    * @throws IOException
    */
-  static DataNode makeInstanceForDir(String dataDir, NutchConf conf) throws IOException {
+  static DataNode makeInstanceForDir(String dataDir, Configuration conf) throws IOException {
     DataNode dn = null;
     File data = new File(dataDir);
     data.mkdirs();
@@ -748,6 +749,6 @@
      */
     public static void main(String args[]) throws IOException {
         LogFormatter.setShowThreadIDs(true);
-        runAndWait(new NutchConf());
+        runAndWait(new Configuration());
     }
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java Fri Feb  3 11:45:32 2006
@@ -13,10 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.util.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.conf.*;
 
 import java.io.*;
 import java.util.*;

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java Fri Feb  3 11:45:32 2006
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
 
 import java.io.*;
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
 
 /**********************************************************************
- * Protocol that an NDFS datanode uses to communicate with the NameNode.
+ * Protocol that an DFS datanode uses to communicate with the NameNode.
  * It's used to upload current load information and block records.
  *
  * @author Michael Cafarella

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java Fri Feb  3 11:45:32 2006
@@ -13,9 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
 
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 
 /************************************
  * Some handy constants
@@ -107,8 +107,8 @@
     public static long LEASE_PERIOD = 60 * 1000;
     public static int READ_TIMEOUT = 60 * 1000;
 
-    //TODO mb@media-style.com: should be nutchConf injected?
-    public static final int BUFFER_SIZE = new NutchConf().getInt("io.file.buffer.size", 4096);
+    //TODO mb@media-style.com: should be conf injected?
+    public static final int BUFFER_SIZE = new Configuration().getInt("io.file.buffer.size", 4096);
 
 }
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDataset.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDataset.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDataset.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDataset.java Fri Feb  3 11:45:32 2006
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
 
 import java.io.*;
 import java.util.*;
 
-import org.apache.nutch.fs.*;
-import org.apache.nutch.util.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
 
 /**************************************************
  * FSDataset manages a set of data blocks.  Each block
@@ -191,7 +191,7 @@
     /**
      * An FSDataset has a directory where it loads its data files.
      */
-    public FSDataset(File dir, NutchConf nutchConf) throws IOException {
+    public FSDataset(File dir, Configuration conf) throws IOException {
         this.dirpath = dir.getCanonicalPath();
         this.data = new File(dir, "data");
         if (! data.exists()) {
@@ -199,7 +199,7 @@
         }
         this.tmp = new File(dir, "tmp");
         if (tmp.exists()) {
-            FileUtil.fullyDelete(tmp, nutchConf);
+            FileUtil.fullyDelete(tmp, conf);
         }
         this.tmp.mkdirs();
         this.dirTree = new FSDir(data);