You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2006/02/04 01:39:32 UTC

svn commit: r374796 [1/5] - in /lucene/nutch/trunk: bin/ conf/ lib/ lib/jetty-ext/ src/java/org/apache/nutch/analysis/ src/java/org/apache/nutch/clustering/ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/f...

Author: cutting
Date: Fri Feb  3 16:38:32 2006
New Revision: 374796

URL: http://svn.apache.org/viewcvs?rev=374796&view=rev
Log:
NUTCH-193: MapReduce and NDFS code moved to new project, Hadoop.  See bug report for details.

Added:
    lucene/nutch/trunk/conf/hadoop-default.xml
    lucene/nutch/trunk/conf/mapred-default.xml.template
    lucene/nutch/trunk/lib/hadoop-0.1-dev.jar   (with props)
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/FsDirectory.java
      - copied, changed from r374762, lucene/nutch/trunk/src/java/org/apache/nutch/indexer/NdfsDirectory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java
    lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java
Removed:
    lucene/nutch/trunk/bin/nutch-daemon.sh
    lucene/nutch/trunk/bin/nutch-daemons.sh
    lucene/nutch/trunk/bin/slaves.sh
    lucene/nutch/trunk/bin/start-all.sh
    lucene/nutch/trunk/bin/stop-all.sh
    lucene/nutch/trunk/lib/jetty-5.1.4.LICENSE.txt
    lucene/nutch/trunk/lib/jetty-5.1.4.jar
    lucene/nutch/trunk/lib/jetty-ext/
    lucene/nutch/trunk/src/java/org/apache/nutch/fs/
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/NdfsDirectory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/io/
    lucene/nutch/trunk/src/java/org/apache/nutch/ipc/
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/
    lucene/nutch/trunk/src/java/org/apache/nutch/ndfs/
    lucene/nutch/trunk/src/java/org/apache/nutch/util/Daemon.java
    lucene/nutch/trunk/src/java/org/apache/nutch/util/LogFormatter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConf.java
    lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigurable.java
    lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfigured.java
    lucene/nutch/trunk/src/java/org/apache/nutch/util/Progress.java
    lucene/nutch/trunk/src/test/org/apache/nutch/fs/
    lucene/nutch/trunk/src/test/org/apache/nutch/io/
    lucene/nutch/trunk/src/test/org/apache/nutch/ipc/
    lucene/nutch/trunk/src/test/org/apache/nutch/mapred/
    lucene/nutch/trunk/src/test/org/apache/nutch/ndfs/
    lucene/nutch/trunk/src/webapps/
Modified:
    lucene/nutch/trunk/bin/nutch
    lucene/nutch/trunk/conf/crawl-tool.xml
    lucene/nutch/trunk/conf/nutch-default.xml
    lucene/nutch/trunk/conf/nutch-site.xml.template
    lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java
    lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.java
    lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysisTokenManager.java
    lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Inlink.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Inlinks.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Signature.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/TextProfileSignature.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutput.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/BasicUrlNormalizer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/RegexUrlNormalizer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/UrlNormalizer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/UrlNormalizerFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/ontology/OntologyFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/HtmlParseFilter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/HtmlParseFilters.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/Outlink.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseImpl.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseStatus.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/Parser.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/plugin/Extension.java
    lucene/nutch/trunk/src/java/org/apache/nutch/plugin/Plugin.java
    lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java
    lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java
    lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ContentProperties.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Protocol.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolStatus.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/DistributedSearch.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FetchedSegments.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FieldQueryFilter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hit.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/HitDetails.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LinkDbInlinks.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Query.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilters.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Summarizer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java
    lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java
    lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java
    lucene/nutch/trunk/src/java/org/apache/nutch/util/ThreadPool.java
    lucene/nutch/trunk/src/plugin/build.xml
    lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java
    lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
    lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
    lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCQueryFilter.java
    lucene/nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
    lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
    lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageQueryFilter.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/NGramProfile.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestLanguageIdentifier.java
    lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
    lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
    lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/OntologyImpl.java
    lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/OwlParser.java
    lucene/nutch/trunk/src/plugin/ontology/src/test/org/apache/nutch/ontology/TestOntology.java
    lucene/nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
    lucene/nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
    lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
    lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
    lucene/nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java
    lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
    lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java
    lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java
    lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/ContentReaderListener.java
    lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java
    lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PPTExtractor.java
    lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/PropertiesReaderListener.java
    lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java
    lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java
    lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java
    lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
    lucene/nutch/trunk/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java
    lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java
    lucene/nutch/trunk/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java
    lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java
    lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
    lucene/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
    lucene/nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java
    lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
    lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
    lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
    lucene/nutch/trunk/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
    lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
    lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
    lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
    lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
    lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
    lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
    lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
    lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
    lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java
    lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/DateQueryFilter.java
    lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/TypeQueryFilter.java
    lucene/nutch/trunk/src/plugin/query-site/src/java/org/apache/nutch/searcher/site/SiteQueryFilter.java
    lucene/nutch/trunk/src/plugin/query-url/src/java/org/apache/nutch/searcher/url/URLQueryFilter.java
    lucene/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java
    lucene/nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/net/RegexURLFilter.java
    lucene/nutch/trunk/src/test/nutch-site.xml
    lucene/nutch/trunk/src/test/org/apache/nutch/analysis/TestQueryParser.java
    lucene/nutch/trunk/src/test/org/apache/nutch/net/TestBasicUrlNormalizer.java
    lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java
    lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java
    lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseText.java
    lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParserFactory.java
    lucene/nutch/trunk/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java
    lucene/nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java
    lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java
    lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContentProperties.java
    lucene/nutch/trunk/src/test/org/apache/nutch/searcher/TestHitDetails.java
    lucene/nutch/trunk/src/test/org/apache/nutch/searcher/TestQuery.java

Modified: lucene/nutch/trunk/bin/nutch
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/bin/nutch?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/bin/nutch (original)
+++ lucene/nutch/trunk/bin/nutch Fri Feb  3 16:38:32 2006
@@ -43,12 +43,6 @@
   echo "  dedup             remove duplicates from a set of segment indexes"
   echo "  plugin            load a plugin and run one of its classes main()"
   echo "  server            run a search server"
-  echo "  namenode          run the NDFS namenode"
-  echo "  datanode          run an NDFS datanode"
-  echo "  ndfs              run an NDFS admin client"
-  echo "  jobtracker        run the MapReduce job Tracker node" 
-  echo "  tasktracker       run a MapReduce task Tracker node" 
-  echo "  job               manipulate MapReduce jobs" 
   echo " or"
   echo "  CLASSNAME         run the class named CLASSNAME"
   echo "Most commands print help when invoked w/o parameters."
@@ -155,18 +149,6 @@
   CLASS=org.apache.nutch.plugin.PluginRepository
 elif [ "$COMMAND" = "server" ] ; then
   CLASS='org.apache.nutch.searcher.DistributedSearch$Server'
-elif [ "$COMMAND" = "namenode" ] ; then
-  CLASS='org.apache.nutch.ndfs.NameNode'
-elif [ "$COMMAND" = "datanode" ] ; then
-  CLASS='org.apache.nutch.ndfs.DataNode'
-elif [ "$COMMAND" = "ndfs" ] ; then
-  CLASS=org.apache.nutch.fs.NDFSShell
-elif [ "$COMMAND" = "jobtracker" ] ; then
-  CLASS=org.apache.nutch.mapred.JobTracker
-elif [ "$COMMAND" = "tasktracker" ] ; then
-  CLASS=org.apache.nutch.mapred.TaskTracker
-elif [ "$COMMAND" = "job" ] ; then
-  CLASS=org.apache.nutch.mapred.JobClient
 else
   CLASS=$COMMAND
 fi

Modified: lucene/nutch/trunk/conf/crawl-tool.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/crawl-tool.xml?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/crawl-tool.xml (original)
+++ lucene/nutch/trunk/conf/crawl-tool.xml Fri Feb  3 16:38:32 2006
@@ -1,5 +1,5 @@
 <?xml version="1.0" ?> 
-<?xml:stylesheet type="text/xsl" href="nutch-conf.xsl"?>
+<?xml:stylesheet type="text/xsl" href="configuration.xsl"?>
 
 <!-- Overidden defaults for intranet use. -->
 
@@ -7,7 +7,7 @@
 <!-- wish to modify from this file into nutch-site.xml and change them -->
 <!-- there.  If nutch-site.xml does not already exist, create it.      -->
 
-<nutch-conf>
+<configuration>
 
 <property>
   <name>urlfilter.regex.file</name>
@@ -40,4 +40,4 @@
   each.</description>
 </property>
 
-</nutch-conf>
+</configuration>

Added: lucene/nutch/trunk/conf/hadoop-default.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/hadoop-default.xml?rev=374796&view=auto
==============================================================================
--- lucene/nutch/trunk/conf/hadoop-default.xml (added)
+++ lucene/nutch/trunk/conf/hadoop-default.xml Fri Feb  3 16:38:32 2006
@@ -0,0 +1,237 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Do not modify this file directly.  Instead, copy entries that you -->
+<!-- wish to modify from this file into hadoop-site.xml and change them -->
+<!-- there.  If hadoop-site.xml does not already exist, create it.      -->
+
+<configuration>
+
+<!-- file properties -->
+
+<property>
+  <name>file.content.limit</name>
+  <value>65536</value>
+  <description>The length limit for downloaded content, in bytes.
+  If this value is larger than zero, content longer than it will be
+  truncated; otherwise (zero or negative), no truncation at all.
+  </description>
+</property>
+
+<property>
+  <name>file.content.ignored</name>
+  <value>true</value>
+  <description>If true, no file content will be saved during fetch.
+  And it is probably what we want to set most of time, since file:// URLs
+  are meant to be local and we can always use them directly at parsing
+  and indexing stages. Otherwise file contents will be saved.
+  !! NO IMPLEMENTED YET !!
+  </description>
+</property>
+
+<!-- i/o properties -->
+
+<property>
+  <name>io.sort.factor</name>
+  <value>10</value>
+  <description>The number of streams to merge at once while sorting
+  files.  This determines the number of open file handles.</description>
+</property>
+
+<property>
+  <name>io.sort.mb</name>
+  <value>100</value>
+  <description>The total amount of buffer memory to use while sorting 
+  files, in megabytes.  By default, gives each merge stream 1MB, which
+  should minimize seeks.</description>
+</property>
+
+<property>
+  <name>io.file.buffer.size</name>
+  <value>4096</value>
+  <description>The size of buffer for use in sequence files.
+  The size of this buffer should probably be a multiple of hardware
+  page size (4096 on Intel x86), and it determines how much data is
+  buffered during read and write operations.</description>
+</property>
+  
+<property>
+  <name>io.bytes.per.checksum</name>
+  <value>512</value>
+  <description>The number of bytes per checksum.  Must not be larger than
+  io.file.buffer.size.</description>
+</property>
+
+<property>
+  <name>io.skip.checksum.errors</name>
+  <value>false</value>
+  <description>If true, when a checksum error is encountered while
+  reading a sequence file, entries are skipped, instead of throwing an
+  exception.</description>
+</property>
+  
+<property>
+  <name>io.map.index.skip</name>
+  <value>0</value>
+  <description>Number of index entries to skip between each entry.
+  Zero by default. Setting this to values larger than zero can
+  facilitate opening large map files using less memory.</description>
+</property>
+
+<!-- file system properties -->
+
+<property>
+  <name>fs.default.name</name>
+  <value>local</value>
+  <description>The name of the default file system.  Either the
+  literal string "local" or a host:port for DFS.</description>
+</property>
+
+<property>
+  <name>dfs.datanode.port</name>
+  <value>50010</value>
+  <description>The port number that the dfs datanode server uses as a starting 
+	       point to look for a free port to listen on.
+</description>
+</property>
+
+<property>
+  <name>dfs.name.dir</name>
+  <value>/tmp/hadoop/dfs/name</value>
+  <description>Determines where on the local filesystem the DFS name node
+      should store the name table.</description>
+</property>
+
+<property>
+  <name>dfs.data.dir</name>
+  <value>/tmp/hadoop/dfs/data</value>
+  <description>Determines where on the local filesystem an DFS data node
+  should store its blocks.  If this is a comma- or space-delimited
+  list of directories, then data will be stored in all named
+  directories, typically on different devices.</description>
+</property>
+
+<property>
+  <name>dfs.replication</name>
+  <value>3</value>
+  <description>How many copies we try to have at all times. The actual
+  number of replications is at max the number of datanodes in the
+  cluster.</description>
+</property>
+
+<!-- map/reduce properties -->
+
+<property>
+  <name>mapred.job.tracker</name>
+  <value>local</value>
+  <description>The host and port that the MapReduce job tracker runs
+  at.  If "local", then jobs are run in-process as a single map
+  and reduce task.
+  </description>
+</property>
+
+<property>
+  <name>mapred.job.tracker.info.port</name>
+  <value>50030</value>
+  <description>The port that the MapReduce job tracker info webserver runs at.
+  </description>
+</property>
+
+<property>
+  <name>mapred.task.tracker.output.port</name>
+  <value>50040</value>
+  <description>The port number that the MapReduce task tracker output server uses as a starting
+               point to look for a free port to listen on.
+  </description>
+</property>
+
+<property>
+  <name>mapred.task.tracker.report.port</name>
+  <value>50050</value>
+  <description>The port number that the MapReduce task tracker report server uses as a starting
+               point to look for a free port to listen on.
+  </description>
+</property>
+
+<property>
+  <name>mapred.local.dir</name>
+  <value>/tmp/hadoop/mapred/local</value>
+  <description>The local directory where MapReduce stores intermediate
+  data files.  May be a space- or comma- separated list of
+  directories on different devices in order to spread disk i/o.
+  </description>
+</property>
+
+<property>
+  <name>mapred.system.dir</name>
+  <value>/tmp/hadoop/mapred/system</value>
+  <description>The shared directory where MapReduce stores control files.
+  </description>
+</property>
+
+<property>
+  <name>mapred.temp.dir</name>
+  <value>/tmp/hadoop/mapred/temp</value>
+  <description>A shared directory for temporary files.
+  </description>
+</property>
+
+<property>
+  <name>mapred.map.tasks</name>
+  <value>2</value>
+  <description>The default number of map tasks per job.  Typically set
+  to a prime several times greater than number of available hosts.
+  Ignored when mapred.job.tracker is "local".  
+  </description>
+</property>
+
+<property>
+  <name>mapred.reduce.tasks</name>
+  <value>1</value>
+  <description>The default number of reduce tasks per job.  Typically set
+  to a prime close to the number of available hosts.  Ignored when
+  mapred.job.tracker is "local".
+  </description>
+</property>
+
+<property>
+  <name>mapred.task.timeout</name>
+  <value>600000</value>
+  <description>The number of milliseconds before a task will be
+  terminated if it neither reads an input, writes an output, nor
+  updates its status string.
+  </description>
+</property>
+
+<property>
+  <name>mapred.tasktracker.tasks.maximum</name>
+  <value>2</value>
+  <description>The maximum number of tasks that will be run
+  simultaneously by a task tracker.
+  </description>
+</property>
+
+<property>
+  <name>mapred.child.heap.size</name>
+  <value>200m</value>
+  <description>The heap size (-Xmx) that will be used for task tracker
+  child processes.</description>
+</property>
+
+<property>
+  <name>mapred.combine.buffer.size</name>
+  <value>100000</value>
+  <description>The number of entries the combining collector caches before
+  combining them and writing to disk.</description>
+</property>
+
+
+<!-- ipc properties -->
+
+<property>
+  <name>ipc.client.timeout</name>
+  <value>60000</value>
+  <description>Defines the timeout for IPC calls in milliseconds.</description>
+</property>
+
+</configuration>

Added: lucene/nutch/trunk/conf/mapred-default.xml.template
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/mapred-default.xml.template?rev=374796&view=auto
==============================================================================
--- lucene/nutch/trunk/conf/mapred-default.xml.template (added)
+++ lucene/nutch/trunk/conf/mapred-default.xml.template Fri Feb  3 16:38:32 2006
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
+
+<!-- Put mapred-specific property overrides in this file. -->
+
+<configuration>
+
+</configuration>

Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-default.xml?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Fri Feb  3 16:38:32 2006
@@ -1,11 +1,11 @@
 <?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 
 <!-- Do not modify this file directly.  Instead, copy entries that you -->
 <!-- wish to modify from this file into nutch-site.xml and change them -->
 <!-- there.  If nutch-site.xml does not already exist, create it.      -->
 
-<nutch-conf>
+<configuration>
 
 <!-- HTTP properties -->
 
@@ -110,28 +110,6 @@
     trying to fetch a page.</description>
 </property>
 
-<!-- FILE properties -->
-
-<property>
-  <name>file.content.limit</name>
-  <value>65536</value>
-  <description>The length limit for downloaded content, in bytes.
-  If this value is larger than zero, content longer than it will be
-  truncated; otherwise (zero or negative), no truncation at all.
-  </description>
-</property>
-
-<property>
-  <name>file.content.ignored</name>
-  <value>true</value>
-  <description>If true, no file content will be saved during fetch.
-  And it is probably what we want to set most of time, since file:// URLs
-  are meant to be local and we can always use them directly at parsing
-  and indexing stages. Otherwise file contents will be saved.
-  !! NO IMPLEMENTED YET !!
-  </description>
-</property>
-
 <!-- FTP properties -->
 
 <property>
@@ -338,202 +316,6 @@
   <description>If true, fetcher will store content.</description>
 </property>
 
-<!-- i/o properties -->
-
-<property>
-  <name>io.sort.factor</name>
-  <value>10</value>
-  <description>The number of streams to merge at once while sorting
-  files.  This determines the number of open file handles.</description>
-</property>
-
-<property>
-  <name>io.sort.mb</name>
-  <value>100</value>
-  <description>The total amount of buffer memory to use while sorting 
-  files, in megabytes.  By default, gives each merge stream 1MB, which
-  should minimize seeks.</description>
-</property>
-
-<property>
-  <name>io.file.buffer.size</name>
-  <value>4096</value>
-  <description>The size of buffer for use in sequence files.
-  The size of this buffer should probably be a multiple of hardware
-  page size (4096 on Intel x86), and it determines how much data is
-  buffered during read and write operations.</description>
-</property>
-  
-<property>
-  <name>io.bytes.per.checksum</name>
-  <value>512</value>
-  <description>The number of bytes per checksum.  Must not be larger than
-  io.file.buffer.size.</description>
-</property>
-
-<property>
-  <name>io.skip.checksum.errors</name>
-  <value>false</value>
-  <description>If true, when a checksum error is encountered while
-  reading a sequence file, entries are skipped, instead of throwing an
-  exception.</description>
-</property>
-  
-<property>
-  <name>io.map.index.skip</name>
-  <value>0</value>
-  <description>Number of index entries to skip between each entry.
-  Zero by default. Setting this to values larger than zero can
-  facilitate opening large map files using less memory.</description>
-</property>
-
-<!-- file system properties -->
-
-<property>
-  <name>fs.default.name</name>
-  <value>local</value>
-  <description>The name of the default file system.  Either the
-  literal string "local" or a host:port for NDFS.</description>
-</property>
-
-<property>
-  <name>ndfs.datanode.port</name>
-  <value>50010</value>
-  <description>The port number that the ndfs datanode server uses as a starting 
-	       point to look for a free port to listen on.
-</description>
-</property>
-
-<property>
-  <name>ndfs.name.dir</name>
-  <value>/tmp/nutch/ndfs/name</value>
-  <description>Determines where on the local filesystem the NDFS name node
-      should store the name table.</description>
-</property>
-
-<property>
-  <name>ndfs.data.dir</name>
-  <value>/tmp/nutch/ndfs/data</value>
-  <description>Determines where on the local filesystem an NDFS data node
-  should store its blocks.  If this is a comma- or space-delimited
-  list of directories, then data will be stored in all named
-  directories, typically on different devices.</description>
-</property>
-
-<property>
-  <name>ndfs.replication</name>
-  <value>3</value>
-  <description>How many copies we try to have at all times. The actual
-  number of replications is at max the number of datanodes in the
-  cluster.</description>
-</property>
-
-<!-- map/reduce properties -->
-
-<property>
-  <name>mapred.job.tracker</name>
-  <value>local</value>
-  <description>The host and port that the MapReduce job tracker runs
-  at.  If "local", then jobs are run in-process as a single map
-  and reduce task.
-  </description>
-</property>
-
-<property>
-  <name>mapred.job.tracker.info.port</name>
-  <value>50030</value>
-  <description>The port that the MapReduce job tracker info webserver runs at.
-  </description>
-</property>
-
-<property>
-  <name>mapred.task.tracker.output.port</name>
-  <value>50040</value>
-  <description>The port number that the MapReduce task tracker output server uses as a starting
-               point to look for a free port to listen on.
-  </description>
-</property>
-
-<property>
-  <name>mapred.task.tracker.report.port</name>
-  <value>50050</value>
-  <description>The port number that the MapReduce task tracker report server uses as a starting
-               point to look for a free port to listen on.
-  </description>
-</property>
-
-<property>
-  <name>mapred.local.dir</name>
-  <value>/tmp/nutch/mapred/local</value>
-  <description>The local directory where MapReduce stores intermediate
-  data files.  May be a space- or comma- separated list of
-  directories on different devices in order to spread disk i/o.
-  </description>
-</property>
-
-<property>
-  <name>mapred.system.dir</name>
-  <value>/tmp/nutch/mapred/system</value>
-  <description>The shared directory where MapReduce stores control files.
-  </description>
-</property>
-
-<property>
-  <name>mapred.temp.dir</name>
-  <value>/tmp/nutch/mapred/temp</value>
-  <description>A shared directory for temporary files.
-  </description>
-</property>
-
-<property>
-  <name>mapred.map.tasks</name>
-  <value>2</value>
-  <description>The default number of map tasks per job.  Typically set
-  to a prime several times greater than number of available hosts.
-  Ignored when mapred.job.tracker is "local".  
-  </description>
-</property>
-
-<property>
-  <name>mapred.reduce.tasks</name>
-  <value>1</value>
-  <description>The default number of reduce tasks per job.  Typically set
-  to a prime close to the number of available hosts.  Ignored when
-  mapred.job.tracker is "local".
-  </description>
-</property>
-
-<property>
-  <name>mapred.task.timeout</name>
-  <value>600000</value>
-  <description>The number of milliseconds before a task will be
-  terminated if it neither reads an input, writes an output, nor
-  updates its status string.
-  </description>
-</property>
-
-<property>
-  <name>mapred.tasktracker.tasks.maximum</name>
-  <value>2</value>
-  <description>The maximum number of tasks that will be run
-  simultaneously by a task tracker.
-  </description>
-</property>
-
-<property>
-  <name>mapred.child.heap.size</name>
-  <value>200m</value>
-  <description>The heap size (-Xmx) that will be used for task tracker
-  child processes.</description>
-</property>
-
-<property>
-  <name>mapred.combine.buffer.size</name>
-  <value>100000</value>
-  <description>The number of entries the combining collector caches before
-  combining them and writing to disk.</description>
-</property>
-
 <!-- indexer properties -->
 
 <property>
@@ -727,14 +509,6 @@
   </description>
 </property>
 
-<!-- ipc properties -->
-
-<property>
-  <name>ipc.client.timeout</name>
-  <value>60000</value>
-  <description>Defines the timeout for IPC calls in milliseconds.</description>
-</property>
-
 <!-- plugin properties -->
 
 <property>
@@ -949,4 +723,4 @@
   </description>
 </property>
 
-</nutch-conf>
+</configuration>

Modified: lucene/nutch/trunk/conf/nutch-site.xml.template
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-site.xml.template?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-site.xml.template (original)
+++ lucene/nutch/trunk/conf/nutch-site.xml.template Fri Feb  3 16:38:32 2006
@@ -1,8 +1,8 @@
 <?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="nutch-conf.xsl"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 
 <!-- Put site-specific property overrides in this file. -->
 
-<nutch-conf>
+<configuration>
 
-</nutch-conf>
+</configuration>

Added: lucene/nutch/trunk/lib/hadoop-0.1-dev.jar
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/lib/hadoop-0.1-dev.jar?rev=374796&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/nutch/trunk/lib/hadoop-0.1-dev.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java Fri Feb  3 16:38:32 2006
@@ -22,8 +22,9 @@
 import org.apache.nutch.plugin.Extension;
 import org.apache.nutch.plugin.ExtensionPoint;
 import org.apache.nutch.plugin.PluginRuntimeException;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.nutch.plugin.PluginRepository;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
 
 
 /**
@@ -40,12 +41,12 @@
   private NutchAnalyzer DEFAULT_ANALYZER;
   
   private ExtensionPoint extensionPoint;
-  private NutchConf nutchConf;
+  private Configuration conf;
 
-  public AnalyzerFactory (NutchConf nutchConf) {
-      DEFAULT_ANALYZER = new NutchDocumentAnalyzer(nutchConf);
-      this.nutchConf = nutchConf;
-      this.extensionPoint = nutchConf.getPluginRepository().getExtensionPoint(NutchAnalyzer.X_POINT_ID);
+  public AnalyzerFactory (Configuration conf) {
+      DEFAULT_ANALYZER = new NutchDocumentAnalyzer(conf);
+      this.conf = conf;
+      this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(NutchAnalyzer.X_POINT_ID);
       if(this.extensionPoint == null) {
           throw new RuntimeException("x point " + NutchAnalyzer.X_POINT_ID +
           " not found.");
@@ -77,10 +78,10 @@
 
   private Extension getExtension(String lang) {
 
-    Extension extension = (Extension) this.nutchConf.getObject(lang);
+    Extension extension = (Extension) this.conf.getObject(lang);
     if (extension == null) {
       extension = findExtension(lang);
-      this.nutchConf.setObject(lang, extension);
+      this.conf.setObject(lang, extension);
     }
     return extension;
   }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java Fri Feb  3 16:38:32 2006
@@ -24,8 +24,9 @@
 import java.util.*;
 import java.util.logging.Logger;
 
-import org.apache.nutch.util.*;
-
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.searcher.Query.*;
 
 /** Construct n-grams for frequently occuring terms and phrases while indexing.
@@ -40,10 +41,10 @@
   
   /**
    * The constructor.
-   * @param nutchConf
+   * @param conf
    */
-  public CommonGrams(NutchConf nutchConf) {
-      init(nutchConf);
+  public CommonGrams(Configuration conf) {
+      init(conf);
   }
 
   private static class Filter extends TokenFilter {
@@ -133,10 +134,10 @@
   }
 
   /** Construct using the provided config file. */
-  private void init(NutchConf nutchConf) {
+  private void init(Configuration conf) {
     try {
-      Reader reader = nutchConf.getConfResourceAsReader
-        (nutchConf.get("analysis.common.terms.file"));
+      Reader reader = conf.getConfResourceAsReader
+        (conf.get("analysis.common.terms.file"));
       BufferedReader in = new BufferedReader(reader);
       String line;
       while ((line = in.readLine()) != null) {
@@ -236,7 +237,7 @@
       text.append(' ');
     }
     TokenStream ts = new NutchDocumentTokenizer(new StringReader(text.toString()));
-    CommonGrams commonGrams = new CommonGrams(new NutchConf());
+    CommonGrams commonGrams = new CommonGrams(NutchConfiguration.create());
     ts = commonGrams.getFilter(ts, "url");
     Token token;
     while ((token = ts.next()) != null) {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.java Fri Feb  3 16:38:32 2006
@@ -5,8 +5,9 @@
 import org.apache.nutch.searcher.QueryFilters;
 import org.apache.nutch.searcher.Query.Clause;
 import org.apache.nutch.searcher.Query.Clause;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.lucene.analysis.StopFilter;
+import org.apache.nutch.util.NutchConfiguration;
 
 import java.io.*;
 import java.util.*;
@@ -35,12 +36,12 @@
   }
 
   /** Construct a query parser for the text in a reader. */
-  public static Query parseQuery(String queryString, NutchConf nutchConf) throws IOException {
+  public static Query parseQuery(String queryString, Configuration conf) throws IOException {
     NutchAnalysis parser =
       new NutchAnalysis(new FastCharStream(new StringReader(queryString)));
     parser.queryString = queryString;
-    parser.queryFilters = new QueryFilters(nutchConf);
-    return parser.parse(nutchConf);
+    parser.queryFilters = new QueryFilters(conf);
+    return parser.parse(conf);
   }
 
   /** For debugging. */
@@ -49,13 +50,13 @@
     while (true) {
       System.out.print("Query: ");
       String line = in.readLine();
-      System.out.println(parseQuery(line, new NutchConf()));
+      System.out.println(parseQuery(line, NutchConfiguration.create()));
     }
   }
 
 /** Parse a query. */
-  final public Query parse(NutchConf nutchConf) throws ParseException {
-  Query query = new Query(nutchConf);
+  final public Query parse(Configuration conf) throws ParseException {
+  Query query = new Query(conf);
   ArrayList terms;
   Token token;
   String field;

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysisTokenManager.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysisTokenManager.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysisTokenManager.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysisTokenManager.java Fri Feb  3 16:38:32 2006
@@ -4,7 +4,7 @@
 import org.apache.nutch.searcher.QueryFilters;
 import org.apache.nutch.searcher.Query.Clause;
 import org.apache.nutch.searcher.Query.Clause;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.lucene.analysis.StopFilter;
 import java.io.*;
 import java.util.*;

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java Fri Feb  3 16:38:32 2006
@@ -24,7 +24,8 @@
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.conf.Configuration;
 
 /**
  * The analyzer used for Nutch documents. Uses the JavaCC-defined lexical
@@ -44,13 +45,13 @@
   public static final int INTER_ANCHOR_GAP = 4;
   /** Analyzer used to analyze anchors. */
   private static Analyzer ANCHOR_ANALYZER;
-  private NutchConf nutchConf;
+  private Configuration conf;
 
   /**
    * @param conf
    */
-  public NutchDocumentAnalyzer(NutchConf conf) {
-    this.nutchConf = conf;
+  public NutchDocumentAnalyzer(Configuration conf) {
+    this.conf = conf;
     CONTENT_ANALYZER = new ContentAnalyzer(conf);
     ANCHOR_ANALYZER = new AnchorAnalyzer();
   }
@@ -59,8 +60,8 @@
   private static class ContentAnalyzer extends Analyzer {
     private CommonGrams commonGrams;
 
-    public ContentAnalyzer(NutchConf nutchConf) {
-      this.commonGrams = new CommonGrams(nutchConf);
+    public ContentAnalyzer(Configuration conf) {
+      this.commonGrams = new CommonGrams(conf);
     }
 
     /** Constructs a {@link NutchDocumentTokenizer}. */

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java Fri Feb  3 16:38:32 2006
@@ -17,9 +17,9 @@
 package org.apache.nutch.clustering;
 
 import org.apache.nutch.plugin.*;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 import java.util.logging.Logger;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.util.LogFormatter;
 
 /**
  * A factory for retrieving {@link OnlineClusterer} extensions.
@@ -33,9 +33,9 @@
   private ExtensionPoint extensionPoint;
   private String extensionName;
 
-  public OnlineClustererFactory(NutchConf nutchConf) {
-      this.extensionPoint = nutchConf.getPluginRepository().getExtensionPoint(OnlineClusterer.X_POINT_ID);
-      this.extensionName = nutchConf.get("extension.clustering.extension-name");
+  public OnlineClustererFactory(Configuration conf) {
+      this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(OnlineClusterer.X_POINT_ID);
+      this.extensionName = conf.get("extension.clustering.extension-name");
   }
 
   /**

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java Fri Feb  3 16:38:32 2006
@@ -22,13 +22,15 @@
 import java.util.logging.*;
 
 import org.apache.nutch.fetcher.Fetcher;
-import org.apache.nutch.fs.*;
-import org.apache.nutch.util.*;
-import org.apache.nutch.mapred.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.mapred.*;
 import org.apache.nutch.parse.ParseSegment;
 import org.apache.nutch.indexer.DeleteDuplicates;
 import org.apache.nutch.indexer.IndexMerger;
 import org.apache.nutch.indexer.Indexer;
+import org.apache.nutch.util.NutchConfiguration;
 
 public class Crawl {
   public static final Logger LOG =
@@ -48,13 +50,13 @@
       return;
     }
 
-    NutchConf nutchConf = new NutchConf();
-    nutchConf.addConfResource("crawl-tool.xml");
-    JobConf conf = new JobConf(nutchConf);
+    Configuration conf = NutchConfiguration.create();
+    conf.addAppResource("crawl-tool.xml");
+    JobConf job = new JobConf(conf);
 
     File rootUrlDir = null;
     File dir = new File("crawl-" + getDate());
-    int threads = conf.getInt("fetcher.threads.fetch", 10);
+    int threads = job.getInt("fetcher.threads.fetch", 10);
     int depth = 5;
     int topN = Integer.MAX_VALUE;
 
@@ -76,7 +78,7 @@
       }
     }
 
-    NutchFileSystem fs = NutchFileSystem.get(conf);
+    FileSystem fs = FileSystem.get(job);
     if (fs.exists(dir)) {
       throw new RuntimeException(dir + " already exists.");
     }
@@ -95,28 +97,28 @@
     File indexes = new File(dir + "/indexes");
     File index = new File(dir + "/index");
 
-    File tmpDir = conf.getLocalFile("crawl", getDate());
+    File tmpDir = job.getLocalFile("crawl", getDate());
       
     // initialize crawlDb
-    new Injector(conf).inject(crawlDb, rootUrlDir);
+    new Injector(job).inject(crawlDb, rootUrlDir);
       
     for (int i = 0; i < depth; i++) {             // generate new segment
       File segment =
-        new Generator(conf).generate(crawlDb, segments, -1,
+        new Generator(job).generate(crawlDb, segments, -1,
                                      topN, System.currentTimeMillis());
-      new Fetcher(conf).fetch(segment, threads, Fetcher.isParsing(conf));  // fetch it
-      if (!Fetcher.isParsing(conf)) {
-        new ParseSegment(conf).parse(segment);    // parse it, if needed
+      new Fetcher(job).fetch(segment, threads, Fetcher.isParsing(job));  // fetch it
+      if (!Fetcher.isParsing(job)) {
+        new ParseSegment(job).parse(segment);    // parse it, if needed
       }
-      new CrawlDb(conf).update(crawlDb, segment); // update crawldb
+      new CrawlDb(job).update(crawlDb, segment); // update crawldb
     }
       
-    new LinkDb(conf).invert(linkDb, segments); // invert links
+    new LinkDb(job).invert(linkDb, segments); // invert links
 
     // index, dedup & merge
-    new Indexer(conf).index(indexes, crawlDb, linkDb, fs.listFiles(segments));
-    new DeleteDuplicates(conf).dedup(new File[] { indexes });
-    new IndexMerger(fs, fs.listFiles(indexes), index, tmpDir, nutchConf).merge();
+    new Indexer(job).index(indexes, crawlDb, linkDb, fs.listFiles(segments));
+    new DeleteDuplicates(job).dedup(new File[] { indexes });
+    new IndexMerger(fs, fs.listFiles(indexes), index, tmpDir, job).merge();
 
     LOG.info("crawl finished: " + dir);
   }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDatum.java Fri Feb  3 16:38:32 2006
@@ -19,7 +19,8 @@
 import java.io.*;
 import java.util.*;
 
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.conf.*;
 import org.apache.nutch.util.*;
 
 /* The crawl state of a url. */

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java Fri Feb  3 16:38:32 2006
@@ -20,20 +20,23 @@
 import java.util.*;
 import java.util.logging.*;
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.fs.*;
-import org.apache.nutch.util.*;
-import org.apache.nutch.mapred.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.mapred.*;
+
+import org.apache.nutch.util.NutchConfiguration;
 
 /** This class takes a flat file of URLs and adds them to the of pages to be
  * crawled.  Useful for bootstrapping the system. */
-public class CrawlDb extends NutchConfigured {
+public class CrawlDb extends Configured {
 
   public static final Logger LOG =
     LogFormatter.getLogger("org.apache.nutch.crawl.CrawlDb");
 
   /** Construct an CrawlDb. */
-  public CrawlDb(NutchConf conf) {
+  public CrawlDb(Configuration conf) {
     super(conf);
   }
 
@@ -53,7 +56,7 @@
     LOG.info("CrawlDb update: done");
   }
 
-  public static JobConf createJob(NutchConf config, File crawlDb) {
+  public static JobConf createJob(Configuration config, File crawlDb) {
     File newCrawlDb =
       new File(crawlDb,
                Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
@@ -77,7 +80,7 @@
 
   public static void install(JobConf job, File crawlDb) throws IOException {
     File newCrawlDb = job.getOutputDir();
-    NutchFileSystem fs = new JobClient(job).getFs();
+    FileSystem fs = new JobClient(job).getFs();
     File old = new File(crawlDb, "old");
     File current = new File(crawlDb, CrawlDatum.DB_DIR_NAME);
     fs.delete(old);
@@ -87,7 +90,7 @@
   }
 
   public static void main(String[] args) throws Exception {
-    CrawlDb crawlDb = new CrawlDb(new NutchConf());
+    CrawlDb crawlDb = new CrawlDb(NutchConfiguration.create());
     
     if (args.length < 2) {
       System.err.println("Usage: <crawldb> <segment>");

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java Fri Feb  3 16:38:32 2006
@@ -22,27 +22,29 @@
 import java.util.TreeMap;
 import java.util.logging.Logger;
 
-import org.apache.nutch.fs.NutchFileSystem;
-import org.apache.nutch.io.LongWritable;
-import org.apache.nutch.io.MapFile;
-import org.apache.nutch.io.SequenceFile;
-import org.apache.nutch.io.UTF8;
-import org.apache.nutch.io.Writable;
-import org.apache.nutch.io.WritableComparable;
-import org.apache.nutch.mapred.JobClient;
-import org.apache.nutch.mapred.JobConf;
-import org.apache.nutch.mapred.MapFileOutputFormat;
-import org.apache.nutch.mapred.Mapper;
-import org.apache.nutch.mapred.OutputCollector;
-import org.apache.nutch.mapred.Reducer;
-import org.apache.nutch.mapred.Reporter;
-import org.apache.nutch.mapred.SequenceFileInputFormat;
-import org.apache.nutch.mapred.SequenceFileOutputFormat;
-import org.apache.nutch.mapred.TextOutputFormat;
-import org.apache.nutch.mapred.lib.HashPartitioner;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.MapFile;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.MapFile.Reader;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapFileOutputFormat;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileOutputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.apache.hadoop.mapred.lib.HashPartitioner;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
 
+import org.apache.nutch.util.NutchConfiguration;
 /**
  * Read utility for the CrawlDB.
  * 
@@ -130,7 +132,7 @@
     }
   }
   
-  public void processStatJob(String crawlDb, NutchConf config) throws IOException {
+  public void processStatJob(String crawlDb, Configuration config) throws IOException {
     LOG.info("CrawlDb statistics start: " + crawlDb);
     File tmpFolder = new File(crawlDb, "stat_tmp" + System.currentTimeMillis());
 
@@ -152,7 +154,7 @@
     JobClient.runJob(job);
 
     // reading the result
-    NutchFileSystem fileSystem = NutchFileSystem.get(config);
+    FileSystem fileSystem = FileSystem.get(config);
     SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(config, tmpFolder);
 
     UTF8 key = new UTF8();
@@ -201,8 +203,8 @@
 
   }
 
-  public void readUrl(String crawlDb, String url, NutchConf config) throws IOException {
-    NutchFileSystem fs = NutchFileSystem.get(config);
+  public void readUrl(String crawlDb, String url, Configuration config) throws IOException {
+    FileSystem fs = FileSystem.get(config);
     UTF8 key = new UTF8(url);
     CrawlDatum val = new CrawlDatum();
     MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new File(crawlDb, CrawlDatum.DB_DIR_NAME), config);
@@ -215,7 +217,7 @@
     }
   }
   
-  public void processDumpJob(String crawlDb, String output, NutchConf config) throws IOException {
+  public void processDumpJob(String crawlDb, String output, Configuration config) throws IOException {
 
     LOG.info("CrawlDb dump: starting");
     LOG.info("CrawlDb db: " + crawlDb);
@@ -249,7 +251,7 @@
     }
     String param = null;
     String crawlDb = args[0];
-    NutchConf conf = new NutchConf();
+    Configuration conf = NutchConfiguration.create();
     for (int i = 1; i < args.length; i++) {
       if (args[i].equals("-stats")) {
         dbr.processStatJob(crawlDb, conf);

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Fri Feb  3 16:38:32 2006
@@ -19,8 +19,8 @@
 import java.util.Iterator;
 import java.io.IOException;
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.mapred.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.mapred.*;
 
 /** Merge new page entries with existing entries. */
 public class CrawlDbReducer implements Reducer {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Fri Feb  3 16:38:32 2006
@@ -22,13 +22,16 @@
 import java.text.*;
 import java.util.logging.*;
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.util.*;
-import org.apache.nutch.mapred.*;
-import org.apache.nutch.mapred.lib.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.mapred.lib.*;
+
+import org.apache.nutch.util.NutchConfiguration;
 
 /** Generates a subset of a crawl db to fetch. */
-public class Generator extends NutchConfigured {
+public class Generator extends Configured {
 
   public static final Logger LOG =
     LogFormatter.getLogger("org.apache.nutch.crawl.Generator");
@@ -141,7 +144,7 @@
   }
 
   /** Construct a generator. */
-  public Generator(NutchConf conf) {
+  public Generator(Configuration conf) {
     super(conf);
   }
 
@@ -258,7 +261,7 @@
 
     if (topN != Long.MAX_VALUE)
       LOG.info("topN: " + topN);
-    Generator gen = new Generator(new NutchConf());
+    Generator gen = new Generator(NutchConfiguration.create());
     gen.generate(dbDir, segmentsDir, numFetchers, topN, curTime);
   }
 }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Fri Feb  3 16:38:32 2006
@@ -20,15 +20,18 @@
 import java.util.*;
 import java.util.logging.*;
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.fs.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.mapred.*;
+
 import org.apache.nutch.net.*;
-import org.apache.nutch.util.*;
-import org.apache.nutch.mapred.*;
+import org.apache.nutch.util.NutchConfiguration;
 
 /** This class takes a flat file of URLs and adds them to the of pages to be
  * crawled.  Useful for bootstrapping the system. */
-public class Injector extends NutchConfigured {
+public class Injector extends Configured {
   public static final Logger LOG =
     LogFormatter.getLogger("org.apache.nutch.crawl.Injector");
 
@@ -79,7 +82,7 @@
   }
 
   /** Construct an Injector. */
-  public Injector(NutchConf conf) {
+  public Injector(Configuration conf) {
     super(conf);
   }
 
@@ -114,14 +117,14 @@
     CrawlDb.install(mergeJob, crawlDb);
 
     // clean up
-    NutchFileSystem fs = new JobClient(getConf()).getFs();
+    FileSystem fs = new JobClient(getConf()).getFs();
     fs.delete(tempDir);
     LOG.info("Injector: done");
 
   }
 
   public static void main(String[] args) throws Exception {
-    Injector injector = new Injector(new NutchConf());
+    Injector injector = new Injector(NutchConfiguration.create());
     
     if (args.length < 2) {
       System.err.println("Usage: Injector <crawldb> <url_dir>");

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Inlink.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Inlink.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Inlink.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Inlink.java Fri Feb  3 16:38:32 2006
@@ -17,7 +17,7 @@
 package org.apache.nutch.crawl;
 
 import java.io.*;
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
 
 /* An incoming link to a page. */
 public class Inlink implements Writable {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Inlinks.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Inlinks.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Inlinks.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Inlinks.java Fri Feb  3 16:38:32 2006
@@ -20,7 +20,7 @@
 import java.net.*;
 import java.util.*;
 
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
 
 /** A list of {@link Inlink}s. */
 public class Inlinks implements Writable {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Fri Feb  3 16:38:32 2006
@@ -21,14 +21,17 @@
 import java.util.logging.*;
 import java.net.*;
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.fs.*;
-import org.apache.nutch.util.*;
-import org.apache.nutch.mapred.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.mapred.*;
+
 import org.apache.nutch.parse.*;
+import org.apache.nutch.util.NutchConfiguration;
 
 /** Maintains an inverted link map, listing incoming links for each url. */
-public class LinkDb extends NutchConfigured implements Mapper, Reducer {
+public class LinkDb extends Configured implements Mapper, Reducer {
 
   public static final Logger LOG =
     LogFormatter.getLogger("org.apache.nutch.crawl.LinkDb");
@@ -44,7 +47,7 @@
   }
 
   /** Construct an LinkDb. */
-  public LinkDb(NutchConf conf) {
+  public LinkDb(Configuration conf) {
     super(conf);
   }
 
@@ -145,7 +148,7 @@
     LOG.info("LinkDb: done");
   }
 
-  private static JobConf createJob(NutchConf config, File linkDb) {
+  private static JobConf createJob(Configuration config, File linkDb) {
     File newLinkDb =
       new File(linkDb,
                Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
@@ -171,7 +174,7 @@
 
   public static void install(JobConf job, File linkDb) throws IOException {
     File newLinkDb = job.getOutputDir();
-    NutchFileSystem fs = new JobClient(job).getFs();
+    FileSystem fs = new JobClient(job).getFs();
     File old = new File(linkDb, "old");
     File current = new File(linkDb, CURRENT_NAME);
     fs.delete(old);
@@ -181,7 +184,7 @@
   }
 
   public static void main(String[] args) throws Exception {
-    LinkDb linkDb = new LinkDb(new NutchConf());
+    LinkDb linkDb = new LinkDb(NutchConfiguration.create());
     
     if (args.length < 2) {
       System.err.println("Usage: <linkdb> <segments>");

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java Fri Feb  3 16:38:32 2006
@@ -19,12 +19,14 @@
 import java.io.IOException;
 import java.io.File;
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.fs.*;
-import org.apache.nutch.mapred.*;
-import org.apache.nutch.mapred.lib.HashPartitioner;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.mapred.lib.HashPartitioner;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+
+import org.apache.nutch.util.NutchConfiguration;
 
 import java.util.logging.Logger;
 
@@ -34,15 +36,15 @@
 
   private static final Partitioner PARTITIONER = new HashPartitioner();
 
-  private NutchFileSystem fs;
+  private FileSystem fs;
   private File directory;
   private MapFile.Reader[] readers;
-  private NutchConf nutchConf;
+  private Configuration conf;
 
-  public LinkDbReader(NutchFileSystem fs, File directory, NutchConf nutchConf) {
+  public LinkDbReader(FileSystem fs, File directory, Configuration conf) {
     this.fs = fs;
     this.directory = directory;
-    this.nutchConf = nutchConf;
+    this.conf = conf;
   }
 
   public String[] getAnchors(UTF8 url) throws IOException {
@@ -57,7 +59,7 @@
     synchronized (this) {
       if (readers == null) {
         readers = MapFileOutputFormat.getReaders
-          (fs, new File(directory, LinkDb.CURRENT_NAME), this.nutchConf);
+          (fs, new File(directory, LinkDb.CURRENT_NAME), this.conf);
       }
     }
     
@@ -65,7 +67,7 @@
       (readers, PARTITIONER, url, new Inlinks());
   }
   
-  public static void processDumpJob(String linkdb, String output, NutchConf config) throws IOException {
+  public static void processDumpJob(String linkdb, String output, Configuration config) throws IOException {
     LOG.info("LinkDb dump: starting");
     LOG.info("LinkDb db: " + linkdb);
     File outFolder = new File(output);
@@ -92,11 +94,11 @@
       System.err.println("\t-url <url>\tprint information about <url> to System.out");
       return;
     }
-    NutchConf nutchConf = new NutchConf();
+    Configuration conf = NutchConfiguration.create();
     if (args[1].equals("-dump")) {
-      LinkDbReader.processDumpJob(args[0], args[2], nutchConf);
+      LinkDbReader.processDumpJob(args[0], args[2], conf);
     } else if (args[1].equals("-url")) {
-      LinkDbReader dbr = new LinkDbReader(NutchFileSystem.get(new NutchConf()), new File(args[0]), nutchConf);
+      LinkDbReader dbr = new LinkDbReader(FileSystem.get(NutchConfiguration.create()), new File(args[0]), conf);
       Inlinks links = dbr.getInlinks(new UTF8(args[2]));
       if (links == null) {
         System.out.println(" - no link information.");

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MD5Signature.java Fri Feb  3 16:38:32 2006
@@ -16,7 +16,7 @@
 
 package org.apache.nutch.crawl;
 
-import org.apache.nutch.io.MD5Hash;
+import org.apache.hadoop.io.MD5Hash;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.protocol.Content;
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/PartitionUrlByHost.java Fri Feb  3 16:38:32 2006
@@ -19,8 +19,8 @@
 import java.net.URL;
 import java.net.MalformedURLException;
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.mapred.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.mapred.*;
 
 /** Partition urls by hostname. */
 public class PartitionUrlByHost implements Partitioner {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Signature.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Signature.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Signature.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Signature.java Fri Feb  3 16:38:32 2006
@@ -18,19 +18,19 @@
 
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.util.NutchConf;
-import org.apache.nutch.util.NutchConfigurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configurable;
 
-public abstract class Signature implements NutchConfigurable {
-  protected NutchConf conf;
+public abstract class Signature implements Configurable {
+  protected Configuration conf;
   
   public abstract byte[] calculate(Content content, Parse parse);
 
-  public NutchConf getConf() {
+  public Configuration getConf() {
     return conf;
   }
 
-  public void setConf(NutchConf conf) {
+  public void setConf(Configuration conf) {
     this.conf = conf;
   }
 }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java Fri Feb  3 16:38:32 2006
@@ -18,13 +18,13 @@
 
 import java.util.logging.Logger;
 
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
 
 /**
  * Factory class, which instantiates a Signature implementation according to the
- * current NutchConf configuration. This newly created instance is cached in the
- * NutchConf instance, so that it could be later retrieved.
+ * current Configuration configuration. This newly created instance is cached in the
+ * Configuration instance, so that it could be later retrieved.
  * 
  * @author Andrzej Bialecki &lt;ab@getopt.org&gt;
  */
@@ -35,7 +35,7 @@
   private SignatureFactory() {}                   // no public ctor
 
   /** Return the default Signature implementation. */
-  public static Signature getSignature(NutchConf conf) {
+  public static Signature getSignature(Configuration conf) {
     String clazz = conf.get("db.signature.class", MD5Signature.class.getName());
     Signature impl = (Signature)conf.getObject(clazz);
     if (impl == null) {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/TextProfileSignature.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/TextProfileSignature.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/TextProfileSignature.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/TextProfileSignature.java Fri Feb  3 16:38:32 2006
@@ -26,12 +26,13 @@
 import java.util.HashMap;
 import java.util.Iterator;
 
-import org.apache.nutch.io.MD5Hash;
+import org.apache.hadoop.io.MD5Hash;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseImpl;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.StringUtil;
+import org.apache.nutch.util.NutchConfiguration;
 
 /**
  * <p>An implementation of a page signature. It calculates an MD5 hash
@@ -157,7 +158,7 @@
   
   public static void main(String[] args) throws Exception {
     TextProfileSignature sig = new TextProfileSignature();
-    sig.setConf(new NutchConf());
+    sig.setConf(NutchConfiguration.create());
     HashMap res = new HashMap();
     File[] files = new File(args[0]).listFiles();
     for (int i = 0; i < files.length; i++) {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Fri Feb  3 16:38:32 2006
@@ -19,20 +19,23 @@
 import java.io.IOException;
 import java.io.File;
 
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.mapred.*;
+
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.SignatureFactory;
-import org.apache.nutch.fs.*;
 import org.apache.nutch.net.*;
-import org.apache.nutch.util.*;
 import org.apache.nutch.protocol.*;
 import org.apache.nutch.parse.*;
-import org.apache.nutch.mapred.*;
+import org.apache.nutch.util.*;
 
 import java.util.logging.*;
 
 /** The fetcher. Most of the work is done by plugins. */
-public class Fetcher extends NutchConfigured implements MapRunnable { 
+public class Fetcher extends Configured implements MapRunnable { 
 
   public static final Logger LOG =
     LogFormatter.getLogger("org.apache.nutch.fetcher.Fetcher");
@@ -43,7 +46,7 @@
 
   public static class InputFormat extends SequenceFileInputFormat {
     /** Don't split inputs, to keep things polite. */
-    public FileSplit[] getSplits(NutchFileSystem fs, JobConf job, int nSplits)
+    public FileSplit[] getSplits(FileSystem fs, JobConf job, int nSplits)
       throws IOException {
       File[] files = listFiles(fs, job);
       FileSplit[] splits = new FileSplit[files.length];
@@ -73,18 +76,18 @@
   private boolean parsing;
 
   private class FetcherThread extends Thread {
-    private NutchConf nutchConf;
+    private Configuration conf;
     private URLFilters urlFilters;
     private ParseUtil parseUtil;
     private ProtocolFactory protocolFactory;
 
-    public FetcherThread(NutchConf nutchConf) {
+    public FetcherThread(Configuration conf) {
       this.setDaemon(true);                       // don't hang JVM on exit
       this.setName("FetcherThread");              // use an informative name
-      this.nutchConf = nutchConf;
-      this.urlFilters = new URLFilters(nutchConf);
-      this.parseUtil = new ParseUtil(nutchConf);
-      this.protocolFactory = new ProtocolFactory(nutchConf);
+      this.conf = conf;
+      this.urlFilters = new URLFilters(conf);
+      this.parseUtil = new ParseUtil(conf);
+      this.protocolFactory = new ProtocolFactory(conf);
     }
 
     public void run() {
@@ -205,7 +208,7 @@
 
       if (content == null) {
         String url = key.toString();
-        content = new Content(url, url, new byte[0], "", new ContentProperties(), this.nutchConf);
+        content = new Content(url, url, new byte[0], "", new ContentProperties(), this.conf);
       }
 
       content.getMetadata().setProperty           // add segment to metadata
@@ -252,7 +255,7 @@
 
   public Fetcher() { super(null); }
 
-  public Fetcher(NutchConf conf) { super(conf); }
+  public Fetcher(Configuration conf) { super(conf); }
 
   private synchronized void updateStatus(int bytesInPage) throws IOException {
     pages++;
@@ -283,11 +286,11 @@
     }
   }
 
-  public static boolean isParsing(NutchConf conf) {
+  public static boolean isParsing(Configuration conf) {
     return conf.getBoolean("fetcher.parse", true);
   }
 
-  public static boolean isStoringContent(NutchConf conf) {
+  public static boolean isStoringContent(Configuration conf) {
     return conf.getBoolean("fetcher.store.content", true);
   }
 
@@ -370,7 +373,7 @@
       
     File segment = new File(args[0]);
 
-    NutchConf conf = new NutchConf();
+    Configuration conf = NutchConfiguration.create();
 
     int threads = conf.getInt("fetcher.threads.fetch", 10);
     boolean parsing = true;

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutput.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutput.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutput.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutput.java Fri Feb  3 16:38:32 2006
@@ -18,19 +18,21 @@
 
 import java.io.*;
 
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.parse.*;
-import org.apache.nutch.util.NutchConf;
-import org.apache.nutch.util.NutchConfigurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configurable;
 
 /* An entry in the fetcher's output. */
-public final class FetcherOutput implements Writable, NutchConfigurable {
+public final class FetcherOutput implements Writable, Configurable {
   private CrawlDatum crawlDatum;
   private Content content;
   private ParseImpl parse;
-  private NutchConf nutchConf;
+  private Configuration conf;
+
+  static { WritableName.setName(FetcherOutput.class, "FetcherOutput"); }
 
   public FetcherOutput() {}
 
@@ -44,7 +46,7 @@
   public final void readFields(DataInput in) throws IOException {
     this.crawlDatum = CrawlDatum.read(in);
     this.content = in.readBoolean() ? Content.read(in) : null;
-    this.parse = in.readBoolean() ? ParseImpl.read(in, this.nutchConf) : null;
+    this.parse = in.readBoolean() ? ParseImpl.read(in, this.conf) : null;
   }
 
   public final void write(DataOutput out) throws IOException {
@@ -80,12 +82,12 @@
     return buffer.toString();
   }
 
-  public void setConf(NutchConf conf) {
-    this.nutchConf = conf;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
   }
 
-  public NutchConf getConf() {
-    return this.nutchConf;
+  public Configuration getConf() {
+    return this.conf;
   }
 
 }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutputFormat.java Fri Feb  3 16:38:32 2006
@@ -20,17 +20,17 @@
 import java.io.File;
 
 import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.fs.NutchFileSystem;
+import org.apache.hadoop.fs.FileSystem;
 
-import org.apache.nutch.io.MapFile;
-import org.apache.nutch.io.WritableComparable;
-import org.apache.nutch.io.Writable;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.MapFile;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.UTF8;
 
-import org.apache.nutch.mapred.OutputFormat;
-import org.apache.nutch.mapred.RecordWriter;
-import org.apache.nutch.mapred.JobConf;
-import org.apache.nutch.mapred.Reporter;
+import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Reporter;
 
 import org.apache.nutch.parse.ParseOutputFormat;
 import org.apache.nutch.protocol.Content;
@@ -38,7 +38,7 @@
 /** Splits FetcherOutput entries into multiple map files. */
 public class FetcherOutputFormat implements OutputFormat {
 
-  public RecordWriter getRecordWriter(final NutchFileSystem fs,
+  public RecordWriter getRecordWriter(final FileSystem fs,
                                       final JobConf job,
                                       final String name) throws IOException {
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java Fri Feb  3 16:38:32 2006
@@ -20,10 +20,13 @@
 import java.util.*;
 import java.util.logging.*;
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.fs.*;
-import org.apache.nutch.util.*;
-import org.apache.nutch.mapred.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.mapred.*;
+
+import org.apache.nutch.util.NutchConfiguration;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.document.Document;
@@ -32,7 +35,7 @@
  * Deletes duplicate documents in a set of Lucene indexes.
  * Duplicates have either the same contents (via MD5 hash) or the same URL.
  ******************************************************************/
-public class DeleteDuplicates extends NutchConfigured
+public class DeleteDuplicates extends Configured
   implements Mapper, Reducer, OutputFormat {
   private static final Logger LOG =
     LogFormatter.getLogger("org.apache.nutch.indexer.DeleteDuplicates");
@@ -127,7 +130,7 @@
     private static final long INDEX_LENGTH = Integer.MAX_VALUE;
 
     /** Return each index as a split. */
-    public FileSplit[] getSplits(NutchFileSystem fs, JobConf job,
+    public FileSplit[] getSplits(FileSystem fs, JobConf job,
                                  int numSplits)
       throws IOException {
       File[] files = listFiles(fs, job);
@@ -139,7 +142,7 @@
     }
 
     /** Return each index as a split. */
-    public RecordReader getRecordReader(final NutchFileSystem fs,
+    public RecordReader getRecordReader(final FileSystem fs,
                                         final FileSplit split,
                                         final JobConf job,
                                         Reporter reporter) throws IOException {
@@ -148,7 +151,7 @@
       return new RecordReader() {
 
           private IndexReader indexReader =
-            IndexReader.open(new NdfsDirectory(fs, split.getFile(), false, job));
+            IndexReader.open(new FsDirectory(fs, split.getFile(), false, job));
 
           { indexReader.undeleteAll(); }
 
@@ -227,16 +230,17 @@
     }
   }
     
-  private NutchFileSystem fs;
+  private FileSystem fs;
   private int ioFileBufferSize;
 
   public DeleteDuplicates() { super(null); }
 
-  public DeleteDuplicates(NutchConf conf) { super(conf); }
+  public DeleteDuplicates(Configuration conf) { super(conf); }
 
   public void configure(JobConf job) {
+    setConf(job);
     try {
-      fs = NutchFileSystem.get(job);
+      fs = FileSystem.get(job);
       this.ioFileBufferSize = job.getInt("io.file.buffer.size", 4096);
     } catch (IOException e) {
       throw new RuntimeException(e);
@@ -256,7 +260,7 @@
                      OutputCollector output, Reporter reporter)
     throws IOException {
     File index = new File(key.toString());
-    IndexReader reader = IndexReader.open(new NdfsDirectory(fs, index, false, getConf()));
+    IndexReader reader = IndexReader.open(new FsDirectory(fs, index, false, getConf()));
     try {
       while (values.hasNext()) {
         reader.delete(((IntWritable)values.next()).get());
@@ -267,7 +271,7 @@
   }
 
   /** Write nothing. */
-  public RecordWriter getRecordWriter(final NutchFileSystem fs,
+  public RecordWriter getRecordWriter(final FileSystem fs,
                                       final JobConf job,
                                       final String name) throws IOException {
     return new RecordWriter() {                   
@@ -334,7 +338,7 @@
   }
 
   public static void main(String[] args) throws Exception {
-    DeleteDuplicates dedup = new DeleteDuplicates(new NutchConf());
+    DeleteDuplicates dedup = new DeleteDuplicates(NutchConfiguration.create());
     
     if (args.length < 1) {
       System.err.println("Usage: <indexes> ...");