You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/02/03 20:45:51 UTC
svn commit: r374733 [1/4] - in /lucene/hadoop/trunk: ./ bin/ conf/ lib/
lib/jetty-ext/ src/java/ src/java/org/ src/java/org/apache/
src/java/org/apache/hadoop/ src/java/org/apache/hadoop/conf/
src/java/org/apache/hadoop/dfs/ src/java/org/apache/hadoop/...
Author: cutting
Date: Fri Feb 3 11:45:32 2006
New Revision: 374733
URL: http://svn.apache.org/viewcvs?rev=374733&view=rev
Log:
Initial commit of code copied from Nutch.
Added:
lucene/hadoop/trunk/bin/
lucene/hadoop/trunk/bin/hadoop
- copied, changed from r374710, lucene/nutch/trunk/bin/nutch
lucene/hadoop/trunk/bin/hadoop-daemon.sh
- copied, changed from r374710, lucene/nutch/trunk/bin/nutch-daemon.sh
lucene/hadoop/trunk/bin/hadoop-daemons.sh
- copied, changed from r374710, lucene/nutch/trunk/bin/nutch-daemons.sh
lucene/hadoop/trunk/bin/slaves.sh
- copied, changed from r374710, lucene/nutch/trunk/bin/slaves.sh
lucene/hadoop/trunk/bin/start-all.sh
- copied, changed from r374710, lucene/nutch/trunk/bin/start-all.sh
lucene/hadoop/trunk/bin/stop-all.sh
- copied, changed from r374710, lucene/nutch/trunk/bin/stop-all.sh
lucene/hadoop/trunk/build.xml
- copied, changed from r374710, lucene/nutch/trunk/build.xml
lucene/hadoop/trunk/conf/
lucene/hadoop/trunk/conf/hadoop-default.xml
- copied, changed from r374710, lucene/nutch/trunk/conf/nutch-default.xml
lucene/hadoop/trunk/conf/hadoop-site.xml.template
- copied unchanged from r374710, lucene/nutch/trunk/conf/nutch-site.xml.template
lucene/hadoop/trunk/conf/mapred-default.xml.template
- copied unchanged from r374710, lucene/nutch/trunk/conf/mapred-default.xml.template
lucene/hadoop/trunk/lib/
lucene/hadoop/trunk/lib/jetty-5.1.4.LICENSE.txt
- copied unchanged from r374710, lucene/nutch/trunk/lib/jetty-5.1.4.LICENSE.txt
lucene/hadoop/trunk/lib/jetty-5.1.4.jar
- copied unchanged from r374710, lucene/nutch/trunk/lib/jetty-5.1.4.jar
lucene/hadoop/trunk/lib/jetty-ext/
- copied from r374710, lucene/nutch/trunk/lib/jetty-ext/
lucene/hadoop/trunk/lib/junit-3.8.1.LICENSE.txt
- copied unchanged from r374710, lucene/nutch/trunk/lib/junit-3.8.1.LICENSE.txt
lucene/hadoop/trunk/lib/junit-3.8.1.jar
- copied unchanged from r374710, lucene/nutch/trunk/lib/junit-3.8.1.jar
lucene/hadoop/trunk/lib/servlet-api.jar
- copied unchanged from r374710, lucene/nutch/trunk/lib/servlet-api.jar
lucene/hadoop/trunk/src/java/
lucene/hadoop/trunk/src/java/org/
lucene/hadoop/trunk/src/java/org/apache/
lucene/hadoop/trunk/src/java/org/apache/hadoop/
lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/
lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configurable.java
- copied, changed from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigurable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configuration.java
- copied, changed from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConf.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configured.java
- copied, changed from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigured.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/
- copied from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/ndfs/
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/
- copied from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/fs/
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/
- copied from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/io/
lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/
- copied from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/ipc/
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/
- copied from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/mapred/
lucene/hadoop/trunk/src/java/org/apache/hadoop/util/
lucene/hadoop/trunk/src/java/org/apache/hadoop/util/Daemon.java
- copied, changed from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/util/LogFormatter.java
- copied, changed from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/LogFormatter.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/util/Progress.java
- copied, changed from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/Progress.java
Modified:
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/Block.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DF.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDataset.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/LocatedBlock.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NDFSClient.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NDFSFile.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NDFSFileInfo.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/ChecksumException.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FSError.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/FileUtil.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/LocalFileSystem.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NDFSFileSystem.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NDFSShell.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NFSDataInputStream.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NFSDataOutputStream.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NFSInputStream.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NFSOutputStream.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/NutchFileSystem.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/fs/Seekable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/ArrayFile.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/ArrayWritable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/BooleanWritable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/BytesWritable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/CompressedWritable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/DataInputBuffer.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/DataOutputBuffer.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/FloatWritable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/IntWritable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/LongWritable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MD5Hash.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/NullWritable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/ObjectWritable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/SequenceFile.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/SetFile.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/TwoDArrayWritable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/UTF8.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/VersionMismatchException.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/VersionedWritable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/Writable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/WritableComparable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/WritableComparator.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/WritableName.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/io/WritableUtils.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/Client.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/RPC.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/ipc/Server.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/CombiningCollector.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/FileSplit.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InterTrackerProtocol.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConf.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobConfigurable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobProfile.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobStatus.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobSubmissionProtocol.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTrackerInfoServer.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/LocalJobRunner.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MRConstants.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapFileOutputFormat.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapOutputFile.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapOutputLocation.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapOutputProtocol.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunnable.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapRunner.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTask.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/MapTaskRunner.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/Mapper.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/OutputCollector.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/OutputFormat.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/Partitioner.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordReader.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RecordWriter.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTask.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/ReduceTaskRunner.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/Reducer.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/Reporter.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/RunningJob.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileInputFormat.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileOutputFormat.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/SequenceFileRecordReader.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/Task.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskInProgress.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskStatus.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTrackerStatus.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextInputFormat.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TextOutputFormat.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/demo/Grep.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/HashPartitioner.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/IdentityMapper.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/IdentityReducer.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/InverseMapper.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/LongSumReducer.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/RegexMapper.java
lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java
Copied: lucene/hadoop/trunk/bin/hadoop (from r374710, lucene/nutch/trunk/bin/nutch)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/hadoop?p2=lucene/hadoop/trunk/bin/hadoop&p1=lucene/nutch/trunk/bin/nutch&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/nutch (original)
+++ lucene/hadoop/trunk/bin/hadoop Fri Feb 3 11:45:32 2006
@@ -1,15 +1,15 @@
#!/bin/bash
#
-# The Nutch command script
+# The Hadoop command script
#
# Environment Variables
#
-# NUTCH_JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
+# JAVA_HOME The java implementation to use. Overrides JAVA_HOME.
#
-# NUTCH_HEAPSIZE The maximum amount of heap to use, in MB.
+# HADOOP_HEAPSIZE The maximum amount of heap to use, in MB.
# Default is 1000.
#
-# NUTCH_OPTS Extra Java runtime options.
+# HADOOP_OPTS Extra Java runtime options.
#
# resolve links - $0 may be a softlink
@@ -26,26 +26,11 @@
# if no args specified, show usage
if [ $# = 0 ]; then
- echo "Usage: nutch COMMAND"
+ echo "Usage: hadoop COMMAND"
echo "where COMMAND is one of:"
- echo " crawl one-step crawler for intranets"
- echo " readdb read / dump crawl db"
- echo " readlinkdb read / dump link db"
- echo " inject inject new urls into the database"
- echo " generate generate new segments to fetch"
- echo " fetch fetch a segment's pages"
- echo " parse parse a segment's pages"
- echo " segread read / dump segment data"
- echo " updatedb update crawl db from segments after fetching"
- echo " invertlinks create a linkdb from parsed segments"
- echo " index run the indexer on parsed segments and linkdb"
- echo " merge merge several segment indexes"
- echo " dedup remove duplicates from a set of segment indexes"
- echo " plugin load a plugin and run one of its classes main()"
- echo " server run a search server"
- echo " namenode run the NDFS namenode"
- echo " datanode run an NDFS datanode"
- echo " ndfs run an NDFS admin client"
+ echo " namenode run the DFS namenode"
+ echo " datanode run an DFS datanode"
+ echo " dfs run an DFS admin client"
echo " jobtracker run the MapReduce job Tracker node"
echo " tasktracker run a MapReduce task Tracker node"
echo " job manipulate MapReduce jobs"
@@ -61,12 +46,12 @@
# some directories
THIS_DIR=`dirname "$THIS"`
-NUTCH_HOME=`cd "$THIS_DIR/.." ; pwd`
+HADOOP_HOME=`cd "$THIS_DIR/.." ; pwd`
# some Java parameters
-if [ "$NUTCH_JAVA_HOME" != "" ]; then
- #echo "run java in $NUTCH_JAVA_HOME"
- JAVA_HOME=$NUTCH_JAVA_HOME
+if [ "$JAVA_HOME" != "" ]; then
+ #echo "run java in $JAVA_HOME"
+ JAVA_HOME=$JAVA_HOME
fi
if [ "$JAVA_HOME" = "" ]; then
@@ -78,46 +63,46 @@
JAVA_HEAP_MAX=-Xmx1000m
# check envvars which might override default args
-if [ "$NUTCH_HEAPSIZE" != "" ]; then
- #echo "run with heapsize $NUTCH_HEAPSIZE"
- JAVA_HEAP_MAX="-Xmx""$NUTCH_HEAPSIZE""m"
+if [ "$HADOOP_HEAPSIZE" != "" ]; then
+ #echo "run with heapsize $HADOOP_HEAPSIZE"
+ JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"
#echo $JAVA_HEAP_MAX
fi
-# CLASSPATH initially contains $NUTCH_CONF_DIR, or defaults to $NUTCH_HOME/conf
-CLASSPATH=${NUTCH_CONF_DIR:=$NUTCH_HOME/conf}
+# CLASSPATH initially contains $HADOOP_CONF_DIR, or defaults to $HADOOP_HOME/conf
+CLASSPATH=${HADOOP_CONF_DIR:=$HADOOP_HOME/conf}
CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
-# for developers, add Nutch classes to CLASSPATH
-if [ -d "$NUTCH_HOME/build/classes" ]; then
- CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build/classes
+# for developers, add Hadoop classes to CLASSPATH
+if [ -d "$HADOOP_HOME/build/classes" ]; then
+ CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/classes
fi
-if [ -d "$NUTCH_HOME/build/plugins" ]; then
- CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build
+if [ -d "$HADOOP_HOME/build/plugins" ]; then
+ CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build
fi
-if [ -d "$NUTCH_HOME/build/test/classes" ]; then
- CLASSPATH=${CLASSPATH}:$NUTCH_HOME/build/test/classes
+if [ -d "$HADOOP_HOME/build/test/classes" ]; then
+ CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/test/classes
fi
# so that filenames w/ spaces are handled correctly in loops below
IFS=
-# for releases, add Nutch jar to CLASSPATH
-for f in $NUTCH_HOME/nutch-*.jar; do
+# for releases, add Hadoop jar to CLASSPATH
+for f in $HADOOP_HOME/hadoop-*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
# add plugins to classpath
-if [ -d "$NUTCH_HOME/plugins" ]; then
- CLASSPATH=${CLASSPATH}:$NUTCH_HOME
+if [ -d "$HADOOP_HOME/plugins" ]; then
+ CLASSPATH=${CLASSPATH}:$HADOOP_HOME
fi
# add libs to CLASSPATH
-for f in $NUTCH_HOME/lib/*.jar; do
+for f in $HADOOP_HOME/lib/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
-for f in $NUTCH_HOME/lib/jetty-ext/*.jar; do
+for f in $HADOOP_HOME/lib/jetty-ext/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
@@ -125,48 +110,18 @@
unset IFS
# figure out which class to run
-if [ "$COMMAND" = "crawl" ] ; then
- CLASS=org.apache.nutch.crawl.Crawl
-elif [ "$COMMAND" = "inject" ] ; then
- CLASS=org.apache.nutch.crawl.Injector
-elif [ "$COMMAND" = "generate" ] ; then
- CLASS=org.apache.nutch.crawl.Generator
-elif [ "$COMMAND" = "fetch" ] ; then
- CLASS=org.apache.nutch.fetcher.Fetcher
-elif [ "$COMMAND" = "parse" ] ; then
- CLASS=org.apache.nutch.parse.ParseSegment
-elif [ "$COMMAND" = "readdb" ] ; then
- CLASS=org.apache.nutch.crawl.CrawlDbReader
-elif [ "$COMMAND" = "readlinkdb" ] ; then
- CLASS=org.apache.nutch.crawl.LinkDbReader
-elif [ "$COMMAND" = "segread" ] ; then
- CLASS=org.apache.nutch.segment.SegmentReader
-elif [ "$COMMAND" = "updatedb" ] ; then
- CLASS=org.apache.nutch.crawl.CrawlDb
-elif [ "$COMMAND" = "invertlinks" ] ; then
- CLASS=org.apache.nutch.crawl.LinkDb
-elif [ "$COMMAND" = "index" ] ; then
- CLASS=org.apache.nutch.indexer.Indexer
-elif [ "$COMMAND" = "dedup" ] ; then
- CLASS=org.apache.nutch.indexer.DeleteDuplicates
-elif [ "$COMMAND" = "merge" ] ; then
- CLASS=org.apache.nutch.indexer.IndexMerger
-elif [ "$COMMAND" = "plugin" ] ; then
- CLASS=org.apache.nutch.plugin.PluginRepository
-elif [ "$COMMAND" = "server" ] ; then
- CLASS='org.apache.nutch.searcher.DistributedSearch$Server'
-elif [ "$COMMAND" = "namenode" ] ; then
- CLASS='org.apache.nutch.ndfs.NameNode'
+if [ "$COMMAND" = "namenode" ] ; then
+ CLASS='org.apache.hadoop.dfs.NameNode'
elif [ "$COMMAND" = "datanode" ] ; then
- CLASS='org.apache.nutch.ndfs.DataNode'
-elif [ "$COMMAND" = "ndfs" ] ; then
- CLASS=org.apache.nutch.fs.NDFSShell
+ CLASS='org.apache.hadoop.dfs.DataNode'
+elif [ "$COMMAND" = "dfs" ] ; then
+ CLASS=org.apache.hadoop.fs.DFSShell
elif [ "$COMMAND" = "jobtracker" ] ; then
- CLASS=org.apache.nutch.mapred.JobTracker
+ CLASS=org.apache.hadoop.mapred.JobTracker
elif [ "$COMMAND" = "tasktracker" ] ; then
- CLASS=org.apache.nutch.mapred.TaskTracker
+ CLASS=org.apache.hadoop.mapred.TaskTracker
elif [ "$COMMAND" = "job" ] ; then
- CLASS=org.apache.nutch.mapred.JobClient
+ CLASS=org.apache.hadoop.mapred.JobClient
else
CLASS=$COMMAND
fi
@@ -177,5 +132,4 @@
fi
# run it
-exec "$JAVA" $JAVA_HEAP_MAX $NUTCH_OPTS -classpath "$CLASSPATH" $CLASS "$@"
-
+exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"
Copied: lucene/hadoop/trunk/bin/hadoop-daemon.sh (from r374710, lucene/nutch/trunk/bin/nutch-daemon.sh)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/hadoop-daemon.sh?p2=lucene/hadoop/trunk/bin/hadoop-daemon.sh&p1=lucene/nutch/trunk/bin/nutch-daemon.sh&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/nutch-daemon.sh (original)
+++ lucene/hadoop/trunk/bin/hadoop-daemon.sh Fri Feb 3 11:45:32 2006
@@ -1,16 +1,16 @@
#!/bin/bash
#
-# Runs a Nutch command as a daemon.
+# Runs a Hadoop command as a daemon.
#
# Environment Variables
#
-# NUTCH_LOG_DIR Where log files are stored. PWD by default.
-# NUTCH_MASTER host:path where nutch code should be rsync'd from
-# NUTCH_PID_DIR The pid files are stored. /tmp by default.
-# NUTCH_IDENT_STRING A string representing this instance of nutch. $USER by default
+# HADOOP_LOG_DIR Where log files are stored. PWD by default.
+# HADOOP_MASTER host:path where hadoop code should be rsync'd from
+# HADOOP_PID_DIR The pid files are stored. /tmp by default.
+# HADOOP_IDENT_STRING A string representing this instance of hadoop. $USER by default
##
-usage="Usage: nutch-daemon [start|stop] [nutch-command] [args...]"
+usage="Usage: hadoop-daemon [start|stop] [hadoop-command] [args...]"
# if no args specified, show usage
if [ $# -le 1 ]; then
@@ -37,21 +37,21 @@
done
# get log directory
-if [ "$NUTCH_LOG_DIR" = "" ]; then
- NUTCH_LOG_DIR=$PWD
+if [ "$HADOOP_LOG_DIR" = "" ]; then
+ HADOOP_LOG_DIR=$PWD
fi
-if [ "$NUTCH_PID_DIR" = "" ]; then
- NUTCH_PID_DIR=/tmp
+if [ "$HADOOP_PID_DIR" = "" ]; then
+ HADOOP_PID_DIR=/tmp
fi
-if [ "$NUTCH_IDENT_STRING" = "" ]; then
- NUTCH_IDENT_STRING=$USER
+if [ "$HADOOP_IDENT_STRING" = "" ]; then
+ HADOOP_IDENT_STRING=$USER
fi
# some variables
-log=$NUTCH_LOG_DIR/nutch-$NUTCH_IDENT_STRING-$command-`hostname`.log
-pid=$NUTCH_PID_DIR/nutch-$NUTCH_IDENT_STRING-$command.pid
+log=$HADOOP_LOG_DIR/hadoop-$HADOOP_IDENT_STRING-$command-`hostname`.log
+pid=$HADOOP_PID_DIR/hadoop-$HADOOP_IDENT_STRING-$command.pid
case $startStop in
@@ -65,14 +65,14 @@
fi
root=`dirname $this`/..
- if [ "$NUTCH_MASTER" != "" ]; then
- echo rsync from $NUTCH_MASTER
- rsync -a --delete --exclude=.svn $NUTCH_MASTER/ $root
+ if [ "$HADOOP_MASTER" != "" ]; then
+ echo rsync from $HADOOP_MASTER
+ rsync -a --delete --exclude=.svn $HADOOP_MASTER/ $root
fi
cd $root
echo starting $command, logging to $log
- nohup bin/nutch $command "$@" >& $log < /dev/null &
+ nohup bin/hadoop $command "$@" >& $log < /dev/null &
echo $! > $pid
sleep 1; head $log
;;
Copied: lucene/hadoop/trunk/bin/hadoop-daemons.sh (from r374710, lucene/nutch/trunk/bin/nutch-daemons.sh)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/hadoop-daemons.sh?p2=lucene/hadoop/trunk/bin/hadoop-daemons.sh&p1=lucene/nutch/trunk/bin/nutch-daemons.sh&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/nutch-daemons.sh (original)
+++ lucene/hadoop/trunk/bin/hadoop-daemons.sh Fri Feb 3 11:45:32 2006
@@ -1,8 +1,8 @@
#!/bin/bash
#
-# Run a Nutch command on all slave hosts.
+# Run a Hadoop command on all slave hosts.
-usage="Usage: nutch-daemons.sh [start|stop] command args..."
+usage="Usage: hadoop-daemons.sh [start|stop] command args..."
# if no args specified, show usage
if [ $# -le 1 ]; then
@@ -13,4 +13,4 @@
bin=`dirname $0`
bin=`cd $bin; pwd`
-exec $bin/slaves.sh $bin/nutch-daemon.sh "$@"
+exec $bin/slaves.sh $bin/hadoop-daemon.sh "$@"
Copied: lucene/hadoop/trunk/bin/slaves.sh (from r374710, lucene/nutch/trunk/bin/slaves.sh)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/slaves.sh?p2=lucene/hadoop/trunk/bin/slaves.sh&p1=lucene/nutch/trunk/bin/slaves.sh&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/slaves.sh (original)
+++ lucene/hadoop/trunk/bin/slaves.sh Fri Feb 3 11:45:32 2006
@@ -4,7 +4,7 @@
#
# Environment Variables
#
-# NUTCH_SLAVES File naming remote hosts. Default is ~/.slaves
+# HADOOP_SLAVES File naming remote hosts. Default is ~/.slaves
##
usage="Usage: slaves.sh command..."
@@ -15,11 +15,11 @@
exit 1
fi
-if [ "$NUTCH_SLAVES" = "" ]; then
- export NUTCH_SLAVES=$HOME/.slaves
+if [ "$HADOOP_SLAVES" = "" ]; then
+ export HADOOP_SLAVES=$HOME/.slaves
fi
-for slave in `cat $NUTCH_SLAVES`; do
+for slave in `cat $HADOOP_SLAVES`; do
ssh -o ConnectTimeout=1 $slave "$@" \
2>&1 | sed "s/^/$slave: /" &
done
Copied: lucene/hadoop/trunk/bin/start-all.sh (from r374710, lucene/nutch/trunk/bin/start-all.sh)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/start-all.sh?p2=lucene/hadoop/trunk/bin/start-all.sh&p1=lucene/nutch/trunk/bin/start-all.sh&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/start-all.sh (original)
+++ lucene/hadoop/trunk/bin/start-all.sh Fri Feb 3 11:45:32 2006
@@ -1,11 +1,11 @@
#!/bin/bash
-# Start all nutch daemons. Run this on master node.
+# Start all hadoop daemons. Run this on master node.
bin=`dirname $0`
bin=`cd $bin; pwd`
-$bin/nutch-daemons.sh start datanode
-$bin/nutch-daemon.sh start namenode
-$bin/nutch-daemon.sh start jobtracker
-$bin/nutch-daemons.sh start tasktracker
+$bin/hadoop-daemons.sh start datanode
+$bin/hadoop-daemon.sh start namenode
+$bin/hadoop-daemon.sh start jobtracker
+$bin/hadoop-daemons.sh start tasktracker
Copied: lucene/hadoop/trunk/bin/stop-all.sh (from r374710, lucene/nutch/trunk/bin/stop-all.sh)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/stop-all.sh?p2=lucene/hadoop/trunk/bin/stop-all.sh&p1=lucene/nutch/trunk/bin/stop-all.sh&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/stop-all.sh (original)
+++ lucene/hadoop/trunk/bin/stop-all.sh Fri Feb 3 11:45:32 2006
@@ -1,11 +1,11 @@
#!/bin/bash
-# Stop all nutch daemons. Run this on master node.
+# Stop all hadoop daemons. Run this on master node.
bin=`dirname $0`
bin=`cd $bin; pwd`
-$bin/nutch-daemon.sh stop jobtracker
-$bin/nutch-daemons.sh stop tasktracker
-$bin/nutch-daemon.sh stop namenode
-$bin/nutch-daemons.sh stop datanode
+$bin/hadoop-daemon.sh stop jobtracker
+$bin/hadoop-daemons.sh stop tasktracker
+$bin/hadoop-daemon.sh stop namenode
+$bin/hadoop-daemons.sh stop datanode
Copied: lucene/hadoop/trunk/build.xml (from r374710, lucene/nutch/trunk/build.xml)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/build.xml?p2=lucene/hadoop/trunk/build.xml&p1=lucene/nutch/trunk/build.xml&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/build.xml (original)
+++ lucene/hadoop/trunk/build.xml Fri Feb 3 11:45:32 2006
@@ -1,13 +1,52 @@
<?xml version="1.0"?>
-<project name="Nutch" default="compile">
+<project name="Hadoop" default="compile">
<!-- Load all the default properties, and any the user wants -->
<!-- to contribute (without having to type -D or edit this file -->
<property file="${user.home}/build.properties" />
<property file="${basedir}/build.properties" />
- <property file="${basedir}/default.properties" />
+ <property name="Name" value="Hadoop"/>
+ <property name="name" value="hadoop"/>
+ <property name="version" value="0.1-dev"/>
+ <property name="final.name" value="${name}-${version}"/>
+ <property name="year" value="2006"/>
+
+ <property name="basedir" value="./"/>
+ <property name="src.dir" value="src/java"/>
+ <property name="lib.dir" value="lib"/>
+ <property name="conf.dir" value="conf"/>
+ <property name="docs.dir" value="docs"/>
+ <property name="docs.src" value="${basedir}/src/web"/>
+
+ <property name="build.dir" value="build"/>
+ <property name="build.classes" value="${build.dir}/classes"/>
+ <property name="build.webapps" value="${build.dir}/webapps"/>
+ <property name="build.docs" value="${build.dir}/docs"/>
+ <property name="build.javadoc" value="${build.docs}/api"/>
+ <property name="build.encoding" value="ISO-8859-1"/>
+
+ <property name="test.src.dir" value="src/test"/>
+ <property name="test.build.dir" value="${build.dir}/test"/>
+ <property name="test.build.data" value=" ${test.build.dir}/data"/>
+ <property name="test.build.classes" value="${test.build.dir}/classes"/>
+ <property name="test.build.javadoc" value="${test.build.dir}/docs/api"/>
+
+ <property name="web.src.dir" value="src/web"/>
+ <property name="src.webapps" value="src/webapps"/>
+
+ <property name="javadoc.link.java"
+ value="http://java.sun.com/j2se/1.4.2/docs/api/"/>
+ <property name="javadoc.packages" value="org.apache.hadoop.*"/>
+
+ <property name="dist.dir" value="${build.dir}/${final.name}"/>
+
+ <property name="javac.debug" value="on"/>
+ <property name="javac.optimize" value="on"/>
+ <property name="javac.deprecation" value="off"/>
+ <property name="javac.version" value="1.4"/>
+
<!-- the normal classpath -->
<path id="classpath">
<pathelement location="${build.classes}"/>
@@ -17,21 +56,13 @@
</path>
<!-- the unit test classpath -->
- <dirname property="plugins.classpath.dir" file="${build.plugins}"/>
<path id="test.classpath">
<pathelement location="${test.build.classes}" />
<pathelement location="${conf.dir}"/>
<pathelement location="${test.src.dir}"/>
- <pathelement location="${plugins.classpath.dir}"/>
<path refid="classpath"/>
</path>
- <!-- xmlcatalog definition for xslt task -->
- <xmlcatalog id="docDTDs">
- <dtd publicId="-//W3C//DTD XHTML 1.0 Transitional//EN"
- location="${xmlcatalog.dir}/xhtml1-transitional.dtd"/>
- </xmlcatalog>
-
<!-- ====================================================== -->
<!-- Stuff needed by all targets -->
<!-- ====================================================== -->
@@ -55,20 +86,16 @@
<fileset dir="${conf.dir}" includes="**/*.template"/>
<mapper type="glob" from="*.template" to="*"/>
</copy>
-
-
</target>
<!-- ====================================================== -->
<!-- Compile the Java files -->
<!-- ====================================================== -->
- <target name="compile" depends="compile-core, compile-plugins"/>
-
- <target name="compile-core" depends="init">
+ <target name="compile" depends="init">
<javac
encoding="${build.encoding}"
srcdir="${src.dir}"
- includes="org/apache/nutch/**/*.java"
+ includes="org/apache/hadoop/**/*.java"
destdir="${build.classes}"
debug="${javac.debug}"
optimize="${javac.optimize}"
@@ -79,32 +106,13 @@
</javac>
</target>
- <target name="compile-plugins">
- <ant dir="src/plugin" target="deploy" inheritAll="false"/>
- </target>
-
- <target name="generate-src" depends="init">
- <javacc target="${src.dir}/org/apache/nutch/analysis/NutchAnalysis.jj"
- javacchome="${javacc.home}">
- </javacc>
-
- <fixcrlf srcdir="${src.dir}" eol="lf" includes="**/*.java"/>
-
- </target>
-
- <target name="dynamic" depends="generate-src, compile">
- </target>
-
-
<!-- ================================================================== -->
- <!-- Make nutch.jar -->
+ <!-- Make hadoop.jar -->
<!-- ================================================================== -->
<!-- -->
<!-- ================================================================== -->
- <target name="jar" depends="compile-core">
- <copy file="${conf.dir}/nutch-default.xml"
- todir="${build.classes}"/>
- <copy file="${conf.dir}/nutch-site.xml"
+ <target name="jar" depends="compile">
+ <copy file="${conf.dir}/hadoop-default.xml"
todir="${build.classes}"/>
<jar jarfile="${build.dir}/${final.name}.jar"
basedir="${build.classes}">
@@ -114,44 +122,13 @@
</target>
<!-- ================================================================== -->
- <!-- Make nutch.war -->
- <!-- ================================================================== -->
- <!-- -->
- <!-- ================================================================== -->
- <target name="war" depends="jar,compile,generate-docs">
- <war destfile="${build.dir}/${final.name}.war"
- webxml="${web.src.dir}/web.xml">
- <fileset dir="${web.src.dir}/jsp"/>
- <zipfileset dir="${docs.src}" includes="include/*.html"/>
- <zipfileset dir="${build.docs}" includes="*/include/*.html"/>
- <fileset dir="${docs.dir}"/>
- <lib dir="${lib.dir}">
- <include name="lucene*.jar"/>
- <include name="taglibs-*.jar"/>
- <include name="dom4j-*.jar"/>
- <include name="xerces-*.jar"/>
- </lib>
- <lib dir="${build.dir}">
- <include name="${final.name}.jar"/>
- </lib>
- <classes dir="${conf.dir}" excludes="**/*.template"/>
- <classes dir="${web.src.dir}/locale"/>
- <zipfileset prefix="WEB-INF/classes/plugins" dir="${build.plugins}"/>
- <webinf dir="${lib.dir}">
- <include name="taglibs-*.tld"/>
- </webinf>
- </war>
- </target>
-
-
- <!-- ================================================================== -->
<!-- Compile test code -->
<!-- ================================================================== -->
- <target name="compile-core-test" depends="compile-core">
+ <target name="compile-test" depends="compile">
<javac
encoding="${build.encoding}"
srcdir="${test.src.dir}"
- includes="org/apache/nutch/**/*.java"
+ includes="org/apache/hadoop/**/*.java"
destdir="${test.build.classes}"
debug="${javac.debug}"
optimize="${javac.optimize}"
@@ -165,14 +142,12 @@
<!-- ================================================================== -->
<!-- Run unit tests -->
<!-- ================================================================== -->
- <target name="test" depends="test-core, test-plugins"/>
-
- <target name="test-core" depends="compile, compile-core-test">
+ <target name="test" depends="compile, compile-test">
<delete dir="${test.build.data}"/>
<mkdir dir="${test.build.data}"/>
- <copy file="${test.src.dir}/nutch-site.xml"
+ <copy file="${test.src.dir}/hadoop-site.xml"
todir="${test.build.classes}"/>
<junit printsummary="yes" haltonfailure="no" fork="yes" dir="${basedir}"
@@ -194,10 +169,6 @@
</target>
- <target name="test-plugins" depends="compile">
- <ant dir="src/plugin" target="test" inheritAll="false"/>
- </target>
-
<target name="nightly" depends="test, tar">
</target>
@@ -217,147 +188,14 @@
bottom="Copyright &copy; ${year} The Apache Software Foundation"
>
<packageset dir="${src.dir}"/>
- <packageset dir="${plugins.dir}/protocol-file/src/java"/>
- <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
- <packageset dir="${plugins.dir}/protocol-http/src/java"/>
- <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
- <packageset dir="${plugins.dir}/parse-html/src/java"/>
- <packageset dir="${plugins.dir}/parse-js/src/java"/>
- <packageset dir="${plugins.dir}/parse-text/src/java"/>
- <packageset dir="${plugins.dir}/parse-pdf/src/java"/>
-<!-- <packageset dir="${plugins.dir}/parse-rtf/src/java"/> plugin excluded from build due to licensing issues-->
-<!-- <packageset dir="${plugins.dir}/parse-mp3/src/java"/> plugin excluded from build due to licensing issues-->
- <packageset dir="${plugins.dir}/parse-msword/src/java"/>
- <packageset dir="${plugins.dir}/index-basic/src/java"/>
- <packageset dir="${plugins.dir}/index-more/src/java"/>
- <packageset dir="${plugins.dir}/query-more/src/java"/>
- <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
- <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
- <packageset dir="${plugins.dir}/creativecommons/src/java"/>
- <packageset dir="${plugins.dir}/languageidentifier/src/java"/>
- <packageset dir="${plugins.dir}/clustering-carrot2/src/java"/>
- <packageset dir="${plugins.dir}/ontology/src/java"/>
<link href="${javadoc.link.java}"/>
- <link href="${javadoc.link.lucene}"/>
<classpath refid="classpath"/>
- <classpath>
- <fileset dir="${plugins.dir}" >
- <include name="**/*.jar"/>
- </fileset>
- </classpath>
- <group title="Core" packages="org.apache.nutch.*"/>
- <group title="Plugins" packages="${plugins.packages}"/>
</javadoc>
</target>
<target name="default-doc">
<style basedir="${conf.dir}" destdir="${docs.dir}"
- includes="nutch-default.xml" style="conf/nutch-conf.xsl"/>
- </target>
-
- <target name="generate-locale" if="doc.locale">
- <echo message="Generating docs for locale=${doc.locale}"/>
-
- <mkdir dir="${build.docs}/${doc.locale}/include"/>
- <xslt in="${docs.src}/include/${doc.locale}/header.xml"
- out="${build.docs}/${doc.locale}/include/header.html"
- style="${docs.src}/style/nutch-header.xsl">
- <xmlcatalog refid="docDTDs"/>
- </xslt>
-
- <dependset>
- <srcfileset dir="${docs.src}/include/${doc.locale}" includes="*.xml"/>
- <srcfileset dir="${docs.src}/style" includes="*.xsl"/>
- <targetfileset dir="${docs.dir}/${doc.locale}" includes="*.html"/>
- </dependset>
-
- <copy file="${docs.src}/style/nutch-page.xsl"
- todir="${build.docs}/${doc.locale}"
- preservelastmodified="true"/>
-
- <xslt basedir="${docs.src}/pages/${doc.locale}"
- destdir="${docs.dir}/${doc.locale}"
- includes="*.xml"
- style="${build.docs}/${doc.locale}/nutch-page.xsl">
- <xmlcatalog refid="docDTDs"/>
- </xslt>
- </target>
-
-
- <target name="generate-docs" depends="init">
- <dependset>
- <srcfileset dir="${docs.src}/include" includes="*.html"/>
- <targetfileset dir="${docs.dir}" includes="**/*.html"/>
- </dependset>
-
- <mkdir dir="${build.docs}/include"/>
- <copy todir="${build.docs}/include">
- <fileset dir="${docs.src}/include"/>
- </copy>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="ca"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="de"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="en"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="es"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="fi"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="fr"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="hu"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="jp"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="ms"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="nl"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="pl"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="pt"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="sv"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="th"/>
- </antcall>
-
- <antcall target="generate-locale">
- <param name="doc.locale" value="zh"/>
- </antcall>
-
- <fixcrlf srcdir="${docs.dir}" eol="lf" encoding="utf-8"
- includes="**/*.html"/>
-
+ includes="hadoop-default.xml" style="conf/hadoop-conf.xsl"/>
</target>
<!-- ================================================================== -->
@@ -365,13 +203,12 @@
<!-- ================================================================== -->
<!-- -->
<!-- ================================================================== -->
- <target name="package" depends="jar, war, javadoc">
+ <target name="package" depends="jar, javadoc">
<mkdir dir="${dist.dir}"/>
<mkdir dir="${dist.dir}/lib"/>
<mkdir dir="${dist.dir}/bin"/>
<mkdir dir="${dist.dir}/docs"/>
<mkdir dir="${dist.dir}/docs/api"/>
- <mkdir dir="${dist.dir}/plugins"/>
<copy todir="${dist.dir}/lib" includeEmptyDirs="false">
<fileset dir="lib"/>
@@ -381,14 +218,8 @@
<fileset dir="${build.webapps}"/>
</copy>
- <copy todir="${dist.dir}/plugins">
- <fileset dir="${build.plugins}"/>
- </copy>
-
<copy file="${build.dir}/${final.name}.jar" todir="${dist.dir}"/>
- <copy file="${build.dir}/${final.name}.war" todir="${dist.dir}"/>
-
<copy todir="${dist.dir}/bin">
<fileset dir="bin"/>
</copy>
@@ -400,10 +231,6 @@
<chmod perm="ugo+x" type="file">
<fileset dir="${dist.dir}/bin"/>
</chmod>
-
- <copy todir="${dist.dir}/docs">
- <fileset dir="${docs.dir}"/>
- </copy>
<copy todir="${dist.dir}/docs/api">
<fileset dir="${build.javadoc}"/>
Copied: lucene/hadoop/trunk/conf/hadoop-default.xml (from r374710, lucene/nutch/trunk/conf/nutch-default.xml)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/conf/hadoop-default.xml?p2=lucene/hadoop/trunk/conf/hadoop-default.xml&p1=lucene/nutch/trunk/conf/nutch-default.xml&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/hadoop/trunk/conf/hadoop-default.xml Fri Feb 3 11:45:32 2006
@@ -7,110 +7,7 @@
<nutch-conf>
-<!-- HTTP properties -->
-
-<property>
- <name>http.agent.name</name>
- <value>NutchCVS</value>
- <description>Our HTTP 'User-Agent' request header.</description>
-</property>
-
-<property>
- <name>http.robots.agents</name>
- <value>NutchCVS,Nutch,*</value>
- <description>The agent strings we'll look for in robots.txt files,
- comma-separated, in decreasing order of precedence.</description>
-</property>
-
-<property>
- <name>http.robots.403.allow</name>
- <value>true</value>
- <description>Some servers return HTTP status 403 (Forbidden) if
- /robots.txt doesn't exist. This should probably mean that we are
- allowed to crawl the site nonetheless. If this is set to false,
- then such sites will be treated as forbidden.</description>
-</property>
-
-<property>
- <name>http.agent.description</name>
- <value>Nutch</value>
- <description>Further description of our bot- this text is used in
- the User-Agent header. It appears in parenthesis after the agent name.
- </description>
-</property>
-
-<property>
- <name>http.agent.url</name>
- <value>http://lucene.apache.org/nutch/bot.html</value>
- <description>A URL to advertise in the User-Agent header. This will
- appear in parenthesis after the agent name.
- </description>
-</property>
-
-<property>
- <name>http.agent.email</name>
- <value>nutch-agent@lucene.apache.org</value>
- <description>An email address to advertise in the HTTP 'From' request
- header and User-Agent header.</description>
-</property>
-
-<property>
- <name>http.agent.version</name>
- <value>0.8-dev</value>
- <description>A version string to advertise in the User-Agent
- header.</description>
-</property>
-
-<property>
- <name>http.timeout</name>
- <value>10000</value>
- <description>The default network timeout, in milliseconds.</description>
-</property>
-
-<property>
- <name>http.max.delays</name>
- <value>100</value>
- <description>The number of times a thread will delay when trying to
- fetch a page. Each time it finds that a host is busy, it will wait
- fetcher.server.delay. After http.max.delays attepts, it will give
- up on the page for now.</description>
-</property>
-
-<property>
- <name>http.content.limit</name>
- <value>65536</value>
- <description>The length limit for downloaded content, in bytes.
- If this value is nonnegative (>=0), content longer than it will be truncated;
- otherwise, no truncation at all.
- </description>
-</property>
-
-<property>
- <name>http.proxy.host</name>
- <value></value>
- <description>The proxy hostname. If empty, no proxy is used.</description>
-</property>
-
-<property>
- <name>http.proxy.port</name>
- <value></value>
- <description>The proxy port.</description>
-</property>
-
-<property>
- <name>http.verbose</name>
- <value>false</value>
- <description>If true, HTTP will log more verbosely.</description>
-</property>
-
-<property>
- <name>http.redirect.max</name>
- <value>3</value>
- <description>The maximum number of redirects the fetcher will follow when
- trying to fetch a page.</description>
-</property>
-
-<!-- FILE properties -->
+<!-- file properties -->
<property>
<name>file.content.limit</name>
@@ -132,212 +29,6 @@
</description>
</property>
-<!-- FTP properties -->
-
-<property>
- <name>ftp.username</name>
- <value>anonymous</value>
- <description>ftp login username.</description>
-</property>
-
-<property>
- <name>ftp.password</name>
- <value>anonymous@example.com</value>
- <description>ftp login password.</description>
-</property>
-
-<property>
- <name>ftp.content.limit</name>
- <value>65536</value>
- <description>The length limit for downloaded content, in bytes.
- If this value is larger than zero, content longer than it is truncated;
- otherwise (zero or negative), no truncation at all. Caution: classical
- ftp RFCs never defines partial transfer and, in fact, some ftp servers
- out there do not handle client side forced close-down very well.
- Our implementation tries its best to handle such situations smoothly.
- </description>
-</property>
-
-<property>
- <name>ftp.timeout</name>
- <value>60000</value>
- <description>Default timeout for ftp client socket, in millisec.
- Please also see ftp.keep.connection below.</description>
-</property>
-
-<property>
- <name>ftp.server.timeout</name>
- <value>100000</value>
- <description>An estimation of ftp server idle time, in millisec.
- Typically it is 120000 millisec for many ftp servers out there.
- Better be conservative here. Together with ftp.timeout, it is used to
- decide if we need to delete (annihilate) current ftp.client instance and
- force to start another ftp.client instance anew. This is necessary because
- a fetcher thread may not be able to obtain next request from queue in time
- (due to idleness) before our ftp client times out or remote server
- disconnects. Used only when ftp.keep.connection is true (please see below).
- </description>
-</property>
-
-<property>
- <name>ftp.keep.connection</name>
- <value>false</value>
- <description>Whether to keep ftp connection. Useful if crawling same host
- again and again. When set to true, it avoids connection, login and dir list
- parser setup for subsequent urls. If it is set to true, however, you must
- make sure (roughly):
- (1) ftp.timeout is less than ftp.server.timeout
- (2) ftp.timeout is larger than (fetcher.threads.fetch * fetcher.server.delay)
- Otherwise there will be too many "delete client because idled too long"
- messages in thread logs.</description>
-</property>
-
-<property>
- <name>ftp.follow.talk</name>
- <value>false</value>
- <description>Whether to log dialogue between our client and remote
- server. Useful for debugging.</description>
-</property>
-
-<!-- web db properties -->
-
-<property>
- <name>db.default.fetch.interval</name>
- <value>30</value>
- <description>The default number of days between re-fetches of a page.
- </description>
-</property>
-
-<property>
- <name>db.ignore.internal.links</name>
- <value>true</value>
- <description>If true, when adding new links to a page, links from
- the same host are ignored. This is an effective way to limit the
- size of the link database, keeping the only the highest quality
- links.
- </description>
-</property>
-
-<property>
- <name>db.score.injected</name>
- <value>1.0</value>
- <description>The score of new pages added by the injector.
- </description>
-</property>
-
-<property>
- <name>db.score.link.external</name>
- <value>1.0</value>
- <description>The score factor for new pages added due to a link from
- another host relative to the referencing page's score.
- </description>
-</property>
-
-<property>
- <name>db.score.link.internal</name>
- <value>1.0</value>
- <description>The score factor for pages added due to a link from the
- same host, relative to the referencing page's score.
- </description>
-</property>
-
-<property>
- <name>db.max.outlinks.per.page</name>
- <value>100</value>
- <description>The maximum number of outlinks that we'll process for a page.
- </description>
-</property>
-
-<property>
- <name>db.max.anchor.length</name>
- <value>100</value>
- <description>The maximum number of characters permitted in an anchor.
- </description>
-</property>
-
-<property>
- <name>db.fetch.retry.max</name>
- <value>3</value>
- <description>The maximum number of times a url that has encountered
- recoverable errors is generated for fetch.</description>
-</property>
-
-<property>
- <name>db.signature.class</name>
- <value>org.apache.nutch.crawl.MD5Signature</value>
- <description>The default implementation of a page signature. Signatures
- created with this implementation will be used for duplicate detection
- and removal.</description>
-</property>
-
-<property>
- <name>db.signature.text_profile.min_token_len</name>
- <value>2</value>
- <description>Minimum token length to be included in the signature.
- </description>
-</property>
-
-<property>
- <name>db.signature.text_profile.quant_rate</name>
- <value>0.01</value>
- <description>Profile frequencies will be rounded down to a multiple of
- QUANT = (int)(QUANT_RATE * maxFreq), where maxFreq is a maximum token
- frequency. If maxFreq > 1 then QUANT will be at least 2, which means that
- for longer texts tokens with frequency 1 will always be discarded.
- </description>
-</property>
-
-<!-- generate properties -->
-
-<property>
- <name>generate.max.per.host</name>
- <value>-1</value>
- <description>The maximum number of urls per host in a single
- fetchlist. -1 if unlimited.</description>
-</property>
-
-<!-- fetcher properties -->
-
-<property>
- <name>fetcher.server.delay</name>
- <value>5.0</value>
- <description>The number of seconds the fetcher will delay between
- successive requests to the same server.</description>
-</property>
-
-<property>
- <name>fetcher.threads.fetch</name>
- <value>10</value>
- <description>The number of FetcherThreads the fetcher should use.
- This is also determines the maximum number of requests that are
- made at once (each FetcherThread handles one connection).</description>
-</property>
-
-<property>
- <name>fetcher.threads.per.host</name>
- <value>1</value>
- <description>This number is the maximum number of threads that
- should be allowed to access a host at one time.</description>
-</property>
-
-<property>
- <name>fetcher.verbose</name>
- <value>false</value>
- <description>If true, fetcher will log more verbosely.</description>
-</property>
-
-<property>
- <name>fetcher.parse</name>
- <value>true</value>
- <description>If true, fetcher will parse content.</description>
-</property>
-
-<property>
- <name>fetcher.store.content</name>
- <value>true</value>
- <description>If true, fetcher will store content.</description>
-</property>
-
<!-- i/o properties -->
<property>
@@ -393,35 +84,35 @@
<name>fs.default.name</name>
<value>local</value>
<description>The name of the default file system. Either the
- literal string "local" or a host:port for NDFS.</description>
+ literal string "local" or a host:port for DFS.</description>
</property>
<property>
- <name>ndfs.datanode.port</name>
+ <name>dfs.datanode.port</name>
<value>50010</value>
- <description>The port number that the ndfs datanode server uses as a starting
+ <description>The port number that the dfs datanode server uses as a starting
point to look for a free port to listen on.
</description>
</property>
<property>
- <name>ndfs.name.dir</name>
- <value>/tmp/nutch/ndfs/name</value>
- <description>Determines where on the local filesystem the NDFS name node
+ <name>dfs.name.dir</name>
+ <value>/tmp/nutch/dfs/name</value>
+ <description>Determines where on the local filesystem the DFS name node
should store the name table.</description>
</property>
<property>
- <name>ndfs.data.dir</name>
- <value>/tmp/nutch/ndfs/data</value>
- <description>Determines where on the local filesystem an NDFS data node
+ <name>dfs.data.dir</name>
+ <value>/tmp/nutch/dfs/data</value>
+ <description>Determines where on the local filesystem an DFS data node
should store its blocks. If this is a comma- or space-delimited
list of directories, then data will be stored in all named
directories, typically on different devices.</description>
</property>
<property>
- <name>ndfs.replication</name>
+ <name>dfs.replication</name>
<value>3</value>
<description>How many copies we try to have at all times. The actual
number of replications is at max the number of datanodes in the
@@ -534,198 +225,6 @@
combining them and writing to disk.</description>
</property>
-<!-- indexer properties -->
-
-<property>
- <name>indexer.score.power</name>
- <value>0.5</value>
- <description>Determines the power of link analyis scores. Each
- pages's boost is set to <i>score<sup>scorePower</sup></i> where
- <i>score</i> is its link analysis score and <i>scorePower</i> is the
- value of this parameter. This is compiled into indexes, so, when
- this is changed, pages must be re-indexed for it to take
- effect.</description>
-</property>
-
-<property>
- <name>indexer.max.title.length</name>
- <value>100</value>
- <description>The maximum number of characters of a title that are indexed.
- </description>
-</property>
-
-<property>
- <name>indexer.max.tokens</name>
- <value>10000</value>
- <description>
- The maximum number of tokens that will be indexed for a single field
- in a document. This limits the amount of memory required for
- indexing, so that collections with very large files will not crash
- the indexing process by running out of memory.
-
- Note that this effectively truncates large documents, excluding
- from the index tokens that occur further in the document. If you
- know your source documents are large, be sure to set this value
- high enough to accomodate the expected size. If you set it to
- Integer.MAX_VALUE, then the only limit is your memory, but you
- should anticipate an OutOfMemoryError.
- </description>
-</property>
-
-<property>
- <name>indexer.mergeFactor</name>
- <value>50</value>
- <description>The factor that determines the frequency of Lucene segment
- merges. This must not be less than 2, higher values increase indexing
- speed but lead to increased RAM usage, and increase the number of
- open file handles (which may lead to "Too many open files" errors).
- NOTE: the "segments" here have nothing to do with Nutch segments, they
- are a low-level data unit used by Lucene.
- </description>
-</property>
-
-<property>
- <name>indexer.minMergeDocs</name>
- <value>50</value>
- <description>This number determines the minimum number of Lucene
- Documents buffered in memory between Lucene segment merges. Larger
- values increase indexing speed and increase RAM usage.
- </description>
-</property>
-
-<property>
- <name>indexer.maxMergeDocs</name>
- <value>2147483647</value>
- <description>This number determines the maximum number of Lucene
- Documents to be merged into a new Lucene segment. Larger values
- increase batch indexing speed and reduce the number of Lucene segments,
- which reduces the number of open file handles; however, this also
- decreases incremental indexing performance.
- </description>
-</property>
-
-<property>
- <name>indexer.termIndexInterval</name>
- <value>128</value>
- <description>Determines the fraction of terms which Lucene keeps in
- RAM when searching, to facilitate random-access. Smaller values use
- more memory but make searches somewhat faster. Larger values use
- less memory but make searches somewhat slower.
- </description>
-</property>
-
-
-<!-- analysis properties -->
-
-<property>
- <name>analysis.common.terms.file</name>
- <value>common-terms.utf8</value>
- <description>The name of a file containing a list of common terms
- that should be indexed in n-grams.</description>
-</property>
-
-<!-- searcher properties -->
-
-<property>
- <name>searcher.dir</name>
- <value>crawl</value>
- <description>
- Path to root of crawl. This directory is searched (in
- order) for either the file search-servers.txt, containing a list of
- distributed search servers, or the directory "index" containing
- merged indexes, or the directory "segments" containing segment
- indexes.
- </description>
-</property>
-
-<property>
- <name>searcher.filter.cache.size</name>
- <value>16</value>
- <description>
- Maximum number of filters to cache. Filters can accelerate certain
- field-based queries, like language, document format, etc. Each
- filter requires one bit of RAM per page. So, with a 10 million page
- index, a cache size of 16 consumes two bytes per page, or 20MB.
- </description>
-</property>
-
-<property>
- <name>searcher.filter.cache.threshold</name>
- <value>0.05</value>
- <description>
- Filters are cached when their term is matched by more than this
- fraction of pages. For example, with a threshold of 0.05, and 10
- million pages, the term must match more than 1/20, or 50,000 pages.
- So, if out of 10 million pages, 50% of pages are in English, and 2%
- are in Finnish, then, with a threshold of 0.05, searches for
- "lang:en" will use a cached filter, while searches for "lang:fi"
- will score all 20,000 finnish documents.
- </description>
-</property>
-
-<property>
- <name>searcher.hostgrouping.rawhits.factor</name>
- <value>2.0</value>
- <description>
- A factor that is used to determine the number of raw hits
- initially fetched, before host grouping is done.
- </description>
-</property>
-
-<property>
- <name>searcher.summary.context</name>
- <value>5</value>
- <description>
- The number of context terms to display preceding and following
- matching terms in a hit summary.
- </description>
-</property>
-
-<property>
- <name>searcher.summary.length</name>
- <value>20</value>
- <description>
- The total number of terms to display in a hit summary.
- </description>
-</property>
-
-<property>
- <name>searcher.max.hits</name>
- <value>-1</value>
- <description>If positive, search stops after this many hits are
- found. Setting this to small, positive values (e.g., 1000) can make
- searches much faster. With a sorted index, the quality of the hits
- suffers little.</description>
-</property>
-
-<!-- URL normalizer properties -->
-
-<property>
- <name>urlnormalizer.class</name>
- <value>org.apache.nutch.net.BasicUrlNormalizer</value>
- <description>Name of the class used to normalize URLs.</description>
-</property>
-
-<property>
- <name>urlnormalizer.regex.file</name>
- <value>regex-normalize.xml</value>
- <description>Name of the config file used by the RegexUrlNormalizer class.</description></property>
-
-<!-- mime properties -->
-
-<property>
- <name>mime.types.file</name>
- <value>mime-types.xml</value>
- <description>Name of file in CLASSPATH containing filename extension and
- magic sequence to mime types mapping information</description>
-</property>
-
-<property>
- <name>mime.type.magic</name>
- <value>true</value>
- <description>Defines if the mime content type detector uses magic resolution.
- </description>
-</property>
<!-- ipc properties -->
@@ -733,220 +232,6 @@
<name>ipc.client.timeout</name>
<value>60000</value>
<description>Defines the timeout for IPC calls in milliseconds.</description>
-</property>
-
-<!-- plugin properties -->
-
-<property>
- <name>plugin.folders</name>
- <value>plugins</value>
- <description>Directories where nutch plugins are located. Each
- element may be a relative or absolute path. If absolute, it is used
- as is. If relative, it is searched for on the classpath.</description>
-</property>
-
-<property>
- <name>plugin.auto-activation</name>
- <value>true</value>
- <description>Defines if some plugins that are not activated regarding
- the plugin.includes and plugin.excludes properties must be automaticaly
- activated if they are needed by some actived plugins.
- </description>
-</property>
-
-<property>
- <name>plugin.includes</name>
- <value>protocol-http|urlfilter-regex|parse-(text|html|js)|index-basic|query-(basic|site|url)</value>
- <description>Regular expression naming plugin directory names to
- include. Any plugin not matching this expression is excluded.
- In any case you need at least include the nutch-extensionpoints plugin. By
- default Nutch includes crawling just HTML and plain text via HTTP,
- and basic indexing and search plugins.
- </description>
-</property>
-
-<property>
- <name>plugin.excludes</name>
- <value></value>
- <description>Regular expression naming plugin directory names to exclude.
- </description>
-</property>
-
-<!-- parser properties -->
-
-<property>
- <name>parse.plugin.file</name>
- <value>parse-plugins.xml</value>
- <description>The name of the file that defines the associations between
- content-types and parsers.</description>
-</property>
-
-<property>
- <name>parser.character.encoding.default</name>
- <value>windows-1252</value>
- <description>The character encoding to fall back to when no other information
- is available</description>
-</property>
-
-<property>
- <name>parser.html.impl</name>
- <value>neko</value>
- <description>HTML Parser implementation. Currently the following keywords
- are recognized: "neko" uses NekoHTML, "tagsoup" uses TagSoup.
- </description>
-</property>
-
-<!-- urlfilter plugin properties -->
-
-<property>
- <name>urlfilter.regex.file</name>
- <value>regex-urlfilter.txt</value>
- <description>Name of file on CLASSPATH containing regular expressions
- used by urlfilter-regex (RegexURLFilter) plugin.</description>
-</property>
-
-<property>
- <name>urlfilter.prefix.file</name>
- <value>prefix-urlfilter.txt</value>
- <description>Name of file on CLASSPATH containing url prefixes
- used by urlfilter-prefix (PrefixURLFilter) plugin.</description>
-</property>
-
-<property>
- <name>urlfilter.order</name>
- <value></value>
- <description>The order by which url filters are applied.
- If empty, all available url filters (as dictated by properties
- plugin-includes and plugin-excludes above) are loaded and applied in system
- defined order. If not empty, only named filters are loaded and applied
- in given order. For example, if this property has value:
- org.apache.nutch.net.RegexURLFilter org.apache.nutch.net.PrefixURLFilter
- then RegexURLFilter is applied first, and PrefixURLFilter second.
- Since all filters are AND'ed, filter ordering does not have impact
- on end result, but it may have performance implication, depending
- on relative expensiveness of filters.
- </description>
-</property>
-
-<!-- clustering extension properties -->
-
-<property>
- <name>extension.clustering.hits-to-cluster</name>
- <value>100</value>
- <description>Number of snippets retrieved for the clustering extension
- if clustering extension is available and user requested results
- to be clustered.</description>
-</property>
-
-<property>
- <name>extension.clustering.extension-name</name>
- <value></value>
- <description>Use the specified online clustering extension. If empty,
- the first available extension will be used. The "name" here refers to an 'id'
- attribute of the 'implementation' element in the plugin descriptor XML
- file.</description>
-</property>
-
-<!-- ontology extension properties -->
-
-<property>
- <name>extension.ontology.extension-name</name>
- <value></value>
- <description>Use the specified online ontology extension. If empty,
- the first available extension will be used. The "name" here refers to an 'id'
- attribute of the 'implementation' element in the plugin descriptor XML
- file.</description>
-</property>
-
-<property>
- <name>extension.ontology.urls</name>
- <value>
- </value>
- <description>Urls of owl files, separated by spaces, such as
- http://www.example.com/ontology/time.owl
- http://www.example.com/ontology/space.owl
- http://www.example.com/ontology/wine.owl
- Or
- file:/ontology/time.owl
- file:/ontology/space.owl
- file:/ontology/wine.owl
- You have to make sure each url is valid.
- By default, there is no owl file, so query refinement based on ontology
- is silently ignored.
- </description>
-</property>
-
-<!-- query-basic plugin properties -->
-
-<property>
- <name>query.url.boost</name>
- <value>4.0</value>
- <description> Used as a boost for url field in Lucene query.
- </description>
-</property>
-
-<property>
- <name>query.anchor.boost</name>
- <value>2.0</value>
- <description> Used as a boost for anchor field in Lucene query.
- </description>
-</property>
-
-
-<property>
- <name>query.title.boost</name>
- <value>1.5</value>
- <description> Used as a boost for title field in Lucene query.
- </description>
-</property>
-
-<property>
- <name>query.host.boost</name>
- <value>2.0</value>
- <description> Used as a boost for host field in Lucene query.
- </description>
-</property>
-
-<property>
- <name>query.phrase.boost</name>
- <value>1.0</value>
- <description> Used as a boost for phrase in Lucene query.
- Multiplied by boost for field phrase is matched in.
- </description>
-</property>
-
-<!-- language-identifier plugin properties -->
-
-<property>
- <name>lang.ngram.min.length</name>
- <value>1</value>
- <description> The minimum size of ngrams to uses to identify
- language (must be between 1 and lang.ngram.max.length).
- The larger is the range between lang.ngram.min.length and
- lang.ngram.max.length, the better is the identification, but
- the slowest it is.
- </description>
-</property>
-
-<property>
- <name>lang.ngram.max.length</name>
- <value>4</value>
- <description> The maximum size of ngrams to uses to identify
- language (must be between lang.ngram.min.length and 4).
- The larger is the range between lang.ngram.min.length and
- lang.ngram.max.length, the better is the identification, but
- the slowest it is.
- </description>
-</property>
-
-<property>
- <name>lang.analyze.max.length</name>
- <value>2048</value>
- <description> The maximum bytes of data to uses to indentify
- the language (0 means full content analysis).
- The larger is this value, the better is the analysis, but the
- slowest it is.
- </description>
</property>
</nutch-conf>
Copied: lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configurable.java (from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigurable.java)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configurable.java?p2=lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configurable.java&p1=lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigurable.java&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigurable.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configurable.java Fri Feb 3 11:45:32 2006
@@ -14,14 +14,14 @@
* limitations under the License.
*/
-package org.apache.nutch.util;
+package org.apache.hadoop.conf;
-/** Something that may be configured with a {@link NutchConf}. */
-public interface NutchConfigurable {
+/** Something that may be configured with a {@link Configuration}. */
+public interface Configurable {
/** Set the configuration to be used by this object. */
- void setConf(NutchConf conf);
+ void setConf(Configuration conf);
/** Return the configuration used by this object. */
- NutchConf getConf();
+ Configuration getConf();
}
Copied: lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configuration.java (from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConf.java)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configuration.java?p2=lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configuration.java&p1=lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConf.java&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConf.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configuration.java Fri Feb 3 11:45:32 2006
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-package org.apache.nutch.util;
+package org.apache.hadoop.conf;
import java.util.*;
import java.net.URL;
@@ -23,14 +23,13 @@
import javax.xml.parsers.*;
-import org.apache.nutch.plugin.PluginRepository;
import org.w3c.dom.*;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
-/** Provides access to Nutch configuration parameters.
+/** Provides access to configuration parameters.
* <p>An ordered list of configuration parameter files with
* default and always-overrides site parameters.
* <p>Default values for all parameters are specified in a file named
@@ -42,25 +41,23 @@
* or write) after {@link #addConfResource(String)} or
* {@link #addConfResource(File)}.
*/
-public class NutchConf {
+public class Configuration {
private static final Logger LOG =
- LogFormatter.getLogger("org.apache.nutch.util.NutchConf");
+ LogFormatter.getLogger("org.apache.hadoop.conf.Configuration");
private ArrayList resourceNames = new ArrayList();
private Properties properties;
private ClassLoader classLoader =
Thread.currentThread().getContextClassLoader();
- private PluginRepository pluginRepository;
-
/** A new configuration. */
- public NutchConf() {
+ public Configuration() {
resourceNames.add("nutch-default.xml");
resourceNames.add("nutch-site.xml");
}
/** A new configuration with the same settings cloned from another. */
- public NutchConf(NutchConf other) {
+ public Configuration(Configuration other) {
this.resourceNames = (ArrayList)other.resourceNames.clone();
if (other.properties != null)
this.properties = (Properties)other.properties.clone();
@@ -89,16 +86,6 @@
}
/**
- * @return a cached instance of the plugin repository
- */
- public PluginRepository getPluginRepository() {
- if (this.pluginRepository == null) {
- this.pluginRepository = new PluginRepository(this);
- }
- return this.pluginRepository;
- }
-
- /**
* Returns the value of the <code>name</code> property, or null if no such
* property exists.
*/
@@ -434,7 +421,7 @@
public String toString() {
StringBuffer sb = new StringBuffer(resourceNames.size()*30);
- sb.append("NutchConf: ");
+ sb.append("Configuration: ");
ListIterator i = resourceNames.listIterator();
while (i.hasNext()) {
if (i.nextIndex() != 0) {
@@ -452,7 +439,7 @@
/** For debugging. List non-default properties to the terminal and exit. */
public static void main(String[] args) throws Exception {
- new NutchConf().write(System.out);
+ new Configuration().write(System.out);
}
}
Copied: lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configured.java (from r374710, lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigured.java)
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configured.java?p2=lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configured.java&p1=lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigured.java&r1=374710&r2=374733&rev=374733&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigured.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/conf/Configured.java Fri Feb 3 11:45:32 2006
@@ -14,25 +14,25 @@
* limitations under the License.
*/
-package org.apache.nutch.util;
+package org.apache.hadoop.conf;
-/** Base class for things that may be configured with a {@link NutchConf}. */
-public class NutchConfigured implements NutchConfigurable {
+/** Base class for things that may be configured with a {@link Configuration}. */
+public class Configured implements Configurable {
- private NutchConf conf;
+ private Configuration conf;
- /** Construct a NutchConfigured. */
- public NutchConfigured(NutchConf conf) {
+ /** Construct a Configured. */
+ public Configured(Configuration conf) {
setConf(conf);
}
// inherit javadoc
- public void setConf(NutchConf conf) {
+ public void setConf(Configuration conf) {
this.conf = conf;
}
// inherit javadoc
- public NutchConf getConf() {
+ public Configuration getConf() {
return conf;
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/Block.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/Block.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/Block.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/Block.java Fri Feb 3 11:45:32 2006
@@ -13,9 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
import java.io.*;
import java.util.*;
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/BlockCommand.java Fri Feb 3 11:45:32 2006
@@ -13,9 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
import java.io.*;
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java Fri Feb 3 11:45:32 2006
@@ -13,13 +13,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
import java.io.*;
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
/**********************************************************************
- * Protocol that an NDFS client uses to communicate with the NameNode.
+ * Protocol that an DFS client uses to communicate with the NameNode.
* It's used to manipulate the namespace, and obtain datanode info.
*
* @author Mike Cafarella
@@ -126,7 +126,7 @@
/**
* Get a listing of the indicated directory
*/
- public NDFSFileInfo[] getListing(String src) throws IOException;
+ public DFSFileInfo[] getListing(String src) throws IOException;
/**
* Get a set of statistics about the filesystem.
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DF.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DF.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DF.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DF.java Fri Feb 3 11:45:32 2006
@@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
import java.io.File;
import java.io.IOException;
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DataNode.java Fri Feb 3 11:45:32 2006
@@ -13,11 +13,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
-import org.apache.nutch.io.*;
-import org.apache.nutch.ipc.*;
-import org.apache.nutch.util.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.ipc.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
import java.io.*;
import java.net.*;
@@ -36,12 +37,12 @@
* @author Mike Cafarella
**********************************************************/
public class DataNode implements FSConstants, Runnable {
- public static final Logger LOG = LogFormatter.getLogger("org.apache.nutch.ndfs.DataNode");
+ public static final Logger LOG = LogFormatter.getLogger("org.apache.hadoop.dfs.DataNode");
//
// REMIND - mjc - I might bring "maxgigs" back so user can place
// artificial limit on space
//private static final long GIGABYTE = 1024 * 1024 * 1024;
- //private static long numGigs = NutchConf.get().getLong("ndfs.datanode.maxgigs", 100);
+ //private static long numGigs = Configuration.get().getLong("dfs.datanode.maxgigs", 100);
//
/**
@@ -70,12 +71,12 @@
Daemon dataXceiveServer = null;
long blockReportInterval;
private long datanodeStartupPeriod;
- private NutchConf fConf;
+ private Configuration fConf;
/**
* Create given a configuration and a dataDir.
*/
- public DataNode(NutchConf conf, String datadir) throws IOException {
+ public DataNode(Configuration conf, String datadir) throws IOException {
this(InetAddress.getLocalHost().getHostName(),
new File(datadir),
createSocketAddr(conf.get("fs.default.name", "local")), conf);
@@ -84,12 +85,12 @@
/**
* Needs a directory to find its data (and config info)
*/
- public DataNode(String machineName, File datadir, InetSocketAddress nameNodeAddr, NutchConf conf) throws IOException {
+ public DataNode(String machineName, File datadir, InetSocketAddress nameNodeAddr, Configuration conf) throws IOException {
this.namenode = (DatanodeProtocol) RPC.getProxy(DatanodeProtocol.class, nameNodeAddr, conf);
this.data = new FSDataset(datadir, conf);
ServerSocket ss = null;
- int tmpPort = conf.getInt("ndfs.datanode.port", 50010);
+ int tmpPort = conf.getInt("dfs.datanode.port", 50010);
while (ss == null) {
try {
ss = new ServerSocket(tmpPort);
@@ -104,11 +105,11 @@
this.dataXceiveServer.start();
long blockReportIntervalBasis =
- conf.getLong("ndfs.blockreport.intervalMsec", BLOCKREPORT_INTERVAL);
+ conf.getLong("dfs.blockreport.intervalMsec", BLOCKREPORT_INTERVAL);
this.blockReportInterval =
blockReportIntervalBasis - new Random().nextInt((int)(blockReportIntervalBasis/10));
this.datanodeStartupPeriod =
- conf.getLong("ndfs.datanode.startupMsec", DATANODE_STARTUP_PERIOD);
+ conf.getLong("dfs.datanode.startupMsec", DATANODE_STARTUP_PERIOD);
}
/**
@@ -676,10 +677,10 @@
/** Start datanode daemons.
* Start a datanode daemon for each comma separated data directory
- * specified in property ndfs.data.dir
+ * specified in property dfs.data.dir
*/
- public static void run(NutchConf conf) throws IOException {
- String[] dataDirs = conf.getStrings("ndfs.data.dir");
+ public static void run(Configuration conf) throws IOException {
+ String[] dataDirs = conf.getStrings("dfs.data.dir");
subThreadList = new Vector(dataDirs.length);
for (int i = 0; i < dataDirs.length; i++) {
DataNode dn = makeInstanceForDir(dataDirs[i], conf);
@@ -694,10 +695,10 @@
/** Start datanode daemons.
* Start a datanode daemon for each comma separated data directory
- * specified in property ndfs.data.dir and wait for them to finish.
+ * specified in property dfs.data.dir and wait for them to finish.
* If this thread is specifically interrupted, it will stop waiting.
*/
- private static void runAndWait(NutchConf conf) throws IOException {
+ private static void runAndWait(Configuration conf) throws IOException {
run(conf);
// Wait for sub threads to exit
@@ -718,12 +719,12 @@
* Make an instance of DataNode after ensuring that given data directory
* (and parent directories, if necessary) can be created.
* @param dataDir where the new DataNode instance should keep its files.
- * @param conf NutchConf instance to use.
+ * @param conf Configuration instance to use.
* @return DataNode instance for given data dir and conf, or null if directory
* cannot be created.
* @throws IOException
*/
- static DataNode makeInstanceForDir(String dataDir, NutchConf conf) throws IOException {
+ static DataNode makeInstanceForDir(String dataDir, Configuration conf) throws IOException {
DataNode dn = null;
File data = new File(dataDir);
data.mkdirs();
@@ -748,6 +749,6 @@
*/
public static void main(String args[]) throws IOException {
LogFormatter.setShowThreadIDs(true);
- runAndWait(new NutchConf());
+ runAndWait(new Configuration());
}
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeInfo.java Fri Feb 3 11:45:32 2006
@@ -13,10 +13,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
-import org.apache.nutch.io.*;
-import org.apache.nutch.util.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.conf.*;
import java.io.*;
import java.util.*;
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DatanodeProtocol.java Fri Feb 3 11:45:32 2006
@@ -14,13 +14,13 @@
* limitations under the License.
*/
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
import java.io.*;
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
/**********************************************************************
- * Protocol that an NDFS datanode uses to communicate with the NameNode.
+ * Protocol that an DFS datanode uses to communicate with the NameNode.
* It's used to upload current load information and block records.
*
* @author Michael Cafarella
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSConstants.java Fri Feb 3 11:45:32 2006
@@ -13,9 +13,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
/************************************
* Some handy constants
@@ -107,8 +107,8 @@
public static long LEASE_PERIOD = 60 * 1000;
public static int READ_TIMEOUT = 60 * 1000;
- //TODO mb@media-style.com: should be nutchConf injected?
- public static final int BUFFER_SIZE = new NutchConf().getInt("io.file.buffer.size", 4096);
+ //TODO mb@media-style.com: should be conf injected?
+ public static final int BUFFER_SIZE = new Configuration().getInt("io.file.buffer.size", 4096);
}
Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDataset.java
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDataset.java?rev=374733&r1=374710&r2=374733&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDataset.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDataset.java Fri Feb 3 11:45:32 2006
@@ -13,13 +13,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.nutch.ndfs;
+package org.apache.hadoop.dfs.
import java.io.*;
import java.util.*;
-import org.apache.nutch.fs.*;
-import org.apache.nutch.util.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
/**************************************************
* FSDataset manages a set of data blocks. Each block
@@ -191,7 +191,7 @@
/**
* An FSDataset has a directory where it loads its data files.
*/
- public FSDataset(File dir, NutchConf nutchConf) throws IOException {
+ public FSDataset(File dir, Configuration conf) throws IOException {
this.dirpath = dir.getCanonicalPath();
this.data = new File(dir, "data");
if (! data.exists()) {
@@ -199,7 +199,7 @@
}
this.tmp = new File(dir, "tmp");
if (tmp.exists()) {
- FileUtil.fullyDelete(tmp, nutchConf);
+ FileUtil.fullyDelete(tmp, conf);
}
this.tmp.mkdirs();
this.dirTree = new FSDir(data);