You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by cu...@apache.org on 2006/02/04 01:39:32 UTC

svn commit: r374796 [3/5] - in /lucene/nutch/trunk: bin/ conf/ lib/ lib/jetty-ext/ src/java/org/apache/nutch/analysis/ src/java/org/apache/nutch/clustering/ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/f...

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/DistributedSearch.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/DistributedSearch.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/DistributedSearch.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/DistributedSearch.java Fri Feb  3 16:38:32 2006
@@ -25,9 +25,12 @@
 import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.parse.ParseText;
 import org.apache.nutch.crawl.Inlinks;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
-import org.apache.nutch.ipc.RPC;
+
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ipc.RPC;
+
+import org.apache.nutch.util.NutchConfiguration;
 
 /** Implements the search API over IPC connnections. */
 public class DistributedSearch {
@@ -61,10 +64,10 @@
       int port = Integer.parseInt(args[0]);
       File directory = new File(args[1]);
 
-      NutchConf nutchConf = new NutchConf();
-      NutchBean bean = new NutchBean(nutchConf, directory);
+      Configuration conf = NutchConfiguration.create();
+      NutchBean bean = new NutchBean(conf, directory);
 
-      org.apache.nutch.ipc.Server server = RPC.getServer(bean, port, 10, true, nutchConf);
+      org.apache.hadoop.ipc.Server server = RPC.getServer(bean, port, 10, true, conf);
       server.start();
       server.join();
     }
@@ -81,15 +84,15 @@
     private HashMap segmentToAddress = new HashMap();
     
     private boolean running = true;
-    private NutchConf nutchConf;
+    private Configuration conf;
 
     /** Construct a client talking to servers listed in the named file.
      * Each line in the file lists a server hostname and port, separated by
      * whitespace. 
      */
 
-    public Client(File file, NutchConf nutchConf) throws IOException {
-      this(readConfig(file), nutchConf);
+    public Client(File file, Configuration conf) throws IOException {
+      this(readConfig(file), conf);
     }
 
     private static InetSocketAddress[] readConfig(File config)
@@ -113,12 +116,12 @@
     }
 
     /** Construct a client talking to the named servers. */
-    public Client(InetSocketAddress[] addresses, NutchConf nutchConf) throws IOException {
+    public Client(InetSocketAddress[] addresses, Configuration conf) throws IOException {
       this.defaultAddresses = addresses;
       updateSegments();
       setDaemon(true);
       start();
-      this.nutchConf = nutchConf;
+      this.conf = conf;
     }
     
     private static final Method GET_SEGMENTS;
@@ -155,7 +158,7 @@
       // build segmentToAddress map
       Object[][] params = new Object[defaultAddresses.length][0];
       String[][] results =
-        (String[][])RPC.call(GET_SEGMENTS, params, defaultAddresses, this.nutchConf);
+        (String[][])RPC.call(GET_SEGMENTS, params, defaultAddresses, this.conf);
 
       for (int i = 0; i < results.length; i++) {  // process results of call
         InetSocketAddress addr = defaultAddresses[i];
@@ -199,7 +202,7 @@
         params[i][3] = sortField;
         params[i][4] = Boolean.valueOf(reverse);
       }
-      Hits[] results = (Hits[])RPC.call(SEARCH, params, liveAddresses, this.nutchConf);
+      Hits[] results = (Hits[])RPC.call(SEARCH, params, liveAddresses, this.conf);
 
       TreeSet queue;                              // cull top hits from results
 
@@ -238,13 +241,13 @@
     
     private Protocol getRemote(Hit hit) {
       return (Protocol)
-        RPC.getProxy(Protocol.class, liveAddresses[hit.getIndexNo()], nutchConf);
+        RPC.getProxy(Protocol.class, liveAddresses[hit.getIndexNo()], conf);
     }
 
     private Protocol getRemote(HitDetails hit) {
       InetSocketAddress address =
         (InetSocketAddress)segmentToAddress.get(hit.getValue("segment"));
-      return (Protocol)RPC.getProxy(Protocol.class, address, nutchConf);
+      return (Protocol)RPC.getProxy(Protocol.class, address, conf);
     }
 
     public String getExplanation(Query query, Hit hit) throws IOException {
@@ -262,7 +265,7 @@
         addrs[i] = liveAddresses[hits[i].getIndexNo()];
         params[i][0] = hits[i];
       }
-      return (HitDetails[])RPC.call(DETAILS, params, addrs, nutchConf);
+      return (HitDetails[])RPC.call(DETAILS, params, addrs, conf);
     }
 
 
@@ -281,7 +284,7 @@
         params[i][0] = hit;
         params[i][1] = query;
       }
-      return (String[])RPC.call(SUMMARY, params, addrs, nutchConf);
+      return (String[])RPC.call(SUMMARY, params, addrs, conf);
     }
     
     public byte[] getContent(HitDetails hit) throws IOException {
@@ -316,7 +319,7 @@
         System.exit(-1);
       }
 
-      Query query = Query.parse(args[0], new NutchConf());
+      Query query = Query.parse(args[0], NutchConfiguration.create());
       
       InetSocketAddress[] addresses = new InetSocketAddress[(args.length-1)/2];
       for (int i = 0; i < (args.length-1)/2; i++) {
@@ -324,7 +327,7 @@
           new InetSocketAddress(args[i*2+1], Integer.parseInt(args[i*2+2]));
       }
 
-      Client client = new Client(addresses, new NutchConf());
+      Client client = new Client(addresses, NutchConfiguration.create());
       //client.setTimeout(Integer.MAX_VALUE);
 
       Hits hits = client.search(query, 10, null, null, false);

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FetchedSegments.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FetchedSegments.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FetchedSegments.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FetchedSegments.java Fri Feb  3 16:38:32 2006
@@ -21,13 +21,13 @@
 
 import java.util.HashMap;
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.fs.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.fs.*;
 import org.apache.nutch.protocol.*;
 import org.apache.nutch.parse.*;
-import org.apache.nutch.util.NutchConf;
-import org.apache.nutch.mapred.*;
-import org.apache.nutch.mapred.lib.*;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.mapred.lib.*;
 import org.apache.nutch.crawl.*;
 
 /** Implements {@link HitSummarizer} and {@link HitContent} for a set of
@@ -37,19 +37,19 @@
   private static class Segment {
     private static final Partitioner PARTITIONER = new HashPartitioner();
 
-    private NutchFileSystem nfs;
+    private FileSystem fs;
     private File segmentDir;
 
     private MapFile.Reader[] content;
     private MapFile.Reader[] parseText;
     private MapFile.Reader[] parseData;
     private MapFile.Reader[] crawl;
-    private NutchConf nutchConf;
+    private Configuration conf;
 
-    public Segment(NutchFileSystem nfs, File segmentDir, NutchConf nutchConf) throws IOException {
-      this.nfs = nfs;
+    public Segment(FileSystem fs, File segmentDir, Configuration conf) throws IOException {
+      this.fs = fs;
       this.segmentDir = segmentDir;
-      this.nutchConf = nutchConf;
+      this.conf = conf;
     }
 
     public CrawlDatum getCrawlDatum(UTF8 url) throws IOException {
@@ -85,7 +85,7 @@
     }
     
     private MapFile.Reader[] getReaders(String subDir) throws IOException {
-      return MapFileOutputFormat.getReaders(nfs, new File(segmentDir, subDir), this.nutchConf);
+      return MapFileOutputFormat.getReaders(fs, new File(segmentDir, subDir), this.conf);
     }
 
     private Writable getEntry(MapFile.Reader[] readers, UTF8 url,
@@ -101,20 +101,20 @@
   private Summarizer summarizer;
 
   /** Construct given a directory containing fetcher output. */
-  public FetchedSegments(NutchFileSystem nfs, String segmentsDir, NutchConf nutchConf) throws IOException {
-    File[] segmentDirs = nfs.listFiles(new File(segmentsDir));
-    this.sumContext = nutchConf.getInt("searcher.summary.context", 5);
-    this.sumLength = nutchConf.getInt("searcher.summary.length", 20);
-    this.summarizer = new Summarizer(nutchConf);
+  public FetchedSegments(FileSystem fs, String segmentsDir, Configuration conf) throws IOException {
+    File[] segmentDirs = fs.listFiles(new File(segmentsDir));
+    this.sumContext = conf.getInt("searcher.summary.context", 5);
+    this.sumLength = conf.getInt("searcher.summary.length", 20);
+    this.summarizer = new Summarizer(conf);
 
     if (segmentDirs != null) {
         for (int i = 0; i < segmentDirs.length; i++) {
             File segmentDir = segmentDirs[i];
 //             File indexdone = new File(segmentDir, IndexSegment.DONE_NAME);
-//             if (nfs.exists(indexdone) && nfs.isFile(indexdone)) {
-//             	segments.put(segmentDir.getName(), new Segment(nfs, segmentDir));
+//             if (fs.exists(indexdone) && fs.isFile(indexdone)) {
+//             	segments.put(segmentDir.getName(), new Segment(fs, segmentDir));
 //             }
-            segments.put(segmentDir.getName(), new Segment(nfs, segmentDir, nutchConf));
+            segments.put(segmentDir.getName(), new Segment(fs, segmentDir, conf));
 
         }
     }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FieldQueryFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FieldQueryFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FieldQueryFilter.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FieldQueryFilter.java Fri Feb  3 16:38:32 2006
@@ -25,14 +25,14 @@
 
 import org.apache.nutch.searcher.Query.Clause;
 import org.apache.nutch.searcher.Query.Phrase;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 
 /** Translate query fields to search the same-named field, as indexed by an
  * IndexingFilter.  Best for tokenized fields. */
 public abstract class FieldQueryFilter implements QueryFilter {
   private String field;
   private float boost = 1.0f;
-  private NutchConf nutchConf;
+  private Configuration conf;
   private CommonGrams commonGrams;
 
   /** Construct for the named field.*/
@@ -93,12 +93,12 @@
     return output;
   }
   
-  public void setConf(NutchConf conf) {
-    this.nutchConf = conf;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
     this.commonGrams = new CommonGrams(conf);
   }
 
-  public NutchConf getConf() {
-    return this.nutchConf;
+  public Configuration getConf() {
+    return this.conf;
   }
 }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hit.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hit.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hit.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hit.java Fri Feb  3 16:38:32 2006
@@ -20,11 +20,11 @@
 import java.io.DataOutput;
 import java.io.IOException;
 
-import org.apache.nutch.io.Writable;
-import org.apache.nutch.io.WritableComparable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
 
 import java.util.logging.Logger;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.util.LogFormatter;
 
 /** A document which matched a query in an index. */
 public class Hit implements Writable, Comparable {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/HitDetails.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/HitDetails.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/HitDetails.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/HitDetails.java Fri Feb  3 16:38:32 2006
@@ -21,9 +21,9 @@
 import java.io.IOException;
 import java.util.logging.Logger;
 
-import org.apache.nutch.io.*;
+import org.apache.hadoop.io.*;
 import org.apache.nutch.html.Entities;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.util.LogFormatter;
 
 /** Data stored in the index for a hit.
  *

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Hits.java Fri Feb  3 16:38:32 2006
@@ -20,12 +20,12 @@
 import java.io.DataOutput;
 import java.io.IOException;
 
-import org.apache.nutch.io.Writable;
-import org.apache.nutch.io.WritableComparable;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.UTF8;
 
 import java.util.logging.Logger;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.util.LogFormatter;
 
 /** A set of hits matching a query. */
 public final class Hits implements Writable {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java Fri Feb  3 16:38:32 2006
@@ -36,9 +36,9 @@
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 
-import org.apache.nutch.fs.*;
-import org.apache.nutch.io.*;
-import org.apache.nutch.util.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.conf.*;
 import org.apache.nutch.indexer.*;
 
 /** Implements {@link Searcher} and {@link HitDetailer} for either a single
@@ -48,42 +48,42 @@
   private org.apache.lucene.search.Searcher luceneSearcher;
   private org.apache.lucene.index.IndexReader reader;
   private LuceneQueryOptimizer optimizer;
-  private NutchFileSystem fs;
-  private NutchConf nutchConf;
+  private FileSystem fs;
+  private Configuration conf;
   private QueryFilters queryFilters;
 
   /** Construct given a number of indexes. */
-  public IndexSearcher(File[] indexDirs, NutchConf nutchConf) throws IOException {
+  public IndexSearcher(File[] indexDirs, Configuration conf) throws IOException {
     IndexReader[] readers = new IndexReader[indexDirs.length];
-    this.nutchConf = nutchConf;
-    this.fs = NutchFileSystem.get(nutchConf);
+    this.conf = conf;
+    this.fs = FileSystem.get(conf);
     for (int i = 0; i < indexDirs.length; i++) {
       readers[i] = IndexReader.open(getDirectory(indexDirs[i]));
     }
-    init(new MultiReader(readers), nutchConf);
+    init(new MultiReader(readers), conf);
   }
 
   /** Construct given a single merged index. */
-  public IndexSearcher(File index,  NutchConf nutchConf)
+  public IndexSearcher(File index,  Configuration conf)
     throws IOException {
-    this.nutchConf = nutchConf;
-    this.fs = NutchFileSystem.get(nutchConf);
-    init(IndexReader.open(getDirectory(index)), nutchConf);
+    this.conf = conf;
+    this.fs = FileSystem.get(conf);
+    init(IndexReader.open(getDirectory(index)), conf);
   }
 
-  private void init(IndexReader reader, NutchConf nutchConf) throws IOException {
+  private void init(IndexReader reader, Configuration conf) throws IOException {
     this.reader = reader;
     this.luceneSearcher = new org.apache.lucene.search.IndexSearcher(reader);
     this.luceneSearcher.setSimilarity(new NutchSimilarity());
-    this.optimizer = new LuceneQueryOptimizer(nutchConf);
-    this.queryFilters = new QueryFilters(nutchConf);
+    this.optimizer = new LuceneQueryOptimizer(conf);
+    this.queryFilters = new QueryFilters(conf);
   }
 
   private Directory getDirectory(File file) throws IOException {
     if ("local".equals(this.fs.getName())) {
       return FSDirectory.getDirectory(file, false);
     } else {
-      return new NdfsDirectory(this.fs, file, false, this.nutchConf);
+      return new FsDirectory(this.fs, file, false, this.conf);
     }
   }
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LinkDbInlinks.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LinkDbInlinks.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LinkDbInlinks.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LinkDbInlinks.java Fri Feb  3 16:38:32 2006
@@ -9,9 +9,9 @@
 
 import org.apache.nutch.crawl.Inlinks;
 import org.apache.nutch.crawl.LinkDbReader;
-import org.apache.nutch.fs.NutchFileSystem;
-import org.apache.nutch.io.UTF8;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.conf.Configuration;
 
 import java.io.File;
 
@@ -19,8 +19,8 @@
   
   private LinkDbReader linkdb = null;
   
-  public LinkDbInlinks(NutchFileSystem fs, File dir, NutchConf nutchConf) {
-    linkdb = new LinkDbReader(fs, dir, nutchConf);
+  public LinkDbInlinks(FileSystem fs, File dir, Configuration conf) {
+    linkdb = new LinkDbReader(fs, dir, conf);
   }
 
   public String[] getAnchors(HitDetails details) throws IOException {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java Fri Feb  3 16:38:32 2006
@@ -22,7 +22,7 @@
 import org.apache.lucene.index.Term;
 import org.apache.lucene.misc.ChainedFilter;
 
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 
 import java.util.LinkedHashMap;
 import java.util.Map;
@@ -72,11 +72,11 @@
    * @param threshold
    *          the fraction of documents which must contain a term
    */
-  public LuceneQueryOptimizer(NutchConf nutchConf) {
-    final int cacheSize = nutchConf.getInt("searcher.filter.cache.size", 16);
-    this.threshold = nutchConf.getFloat("searcher.filter.cache.threshold",
+  public LuceneQueryOptimizer(Configuration conf) {
+    final int cacheSize = conf.getInt("searcher.filter.cache.size", 16);
+    this.threshold = conf.getFloat("searcher.filter.cache.threshold",
         0.05f);
-    this.searcherMaxHits = nutchConf.getInt("searcher.max.hits", -1);
+    this.searcherMaxHits = conf.getInt("searcher.max.hits", -1);
     this.searcherMaxHits = searcherMaxHits;
     this.cache = new LinkedHashMap(cacheSize, 0.75f, true) {
       protected boolean removeEldestEntry(Map.Entry eldest) {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java Fri Feb  3 16:38:32 2006
@@ -21,11 +21,13 @@
 import java.util.logging.Logger;
 import javax.servlet.ServletContext;
 
-import org.apache.nutch.fs.*;
-import org.apache.nutch.util.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
 import org.apache.nutch.parse.*;
 import org.apache.nutch.indexer.*;
 import org.apache.nutch.crawl.Inlinks;
+import org.apache.nutch.util.NutchConfiguration;
 
 /** 
  * One stop shopping for search-related functionality.
@@ -55,12 +57,12 @@
    * don't want to use too many of those. */ 
   private static final int MAX_PROHIBITED_TERMS = 20;
   
-  private NutchConf nutchConf;
+  private Configuration conf;
 
-  private NutchFileSystem fs;
+  private FileSystem fs;
 
   /** Cache in servlet context. */
-  public static NutchBean get(ServletContext app, NutchConf conf) throws IOException {
+  public static NutchBean get(ServletContext app, Configuration conf) throws IOException {
     NutchBean bean = (NutchBean)app.getAttribute("nutchBean");
     if (bean == null) {
       LOG.info("creating new bean");
@@ -73,29 +75,29 @@
 
   /**
    * 
-   * @param nutchConf
+   * @param conf
    * @throws IOException
    */
-  public NutchBean(NutchConf nutchConf) throws IOException {
-    this(nutchConf, null);
+  public NutchBean(Configuration conf) throws IOException {
+    this(conf, null);
   }
   
   /**
    *  Construct in a named directory. 
-   * @param nutchConf
+   * @param conf
    * @param dir
    * @throws IOException
    */
-  public NutchBean(NutchConf nutchConf, File dir) throws IOException {
-        this.nutchConf = nutchConf;
-        this.fs = NutchFileSystem.get(this.nutchConf);
+  public NutchBean(Configuration conf, File dir) throws IOException {
+        this.conf = conf;
+        this.fs = FileSystem.get(this.conf);
         if (dir == null) {
-            dir = new File(this.nutchConf.get("searcher.dir", "crawl"));
+            dir = new File(this.conf.get("searcher.dir", "crawl"));
         }
         File servers = new File(dir, "search-servers.txt");
         if (fs.exists(servers)) {
             LOG.info("searching servers in " + servers.getCanonicalPath());
-            init(new DistributedSearch.Client(servers, nutchConf));
+            init(new DistributedSearch.Client(servers, conf));
         } else {
             init(new File(dir, "index"), new File(dir, "indexes"), new File(
                     dir, "segments"), new File(dir, "linkdb"));
@@ -108,7 +110,7 @@
     IndexSearcher indexSearcher;
     if (this.fs.exists(indexDir)) {
       LOG.info("opening merged index in " + indexDir);
-      indexSearcher = new IndexSearcher(indexDir, this.nutchConf);
+      indexSearcher = new IndexSearcher(indexDir, this.conf);
     } else {
       LOG.info("opening indexes in " + indexesDir);
       
@@ -127,11 +129,11 @@
         directories[i]=(File)vDirs.remove(0);
       }
       
-      indexSearcher = new IndexSearcher(directories, this.nutchConf);
+      indexSearcher = new IndexSearcher(directories, this.conf);
     }
 
     LOG.info("opening segments in " + segmentsDir);
-    FetchedSegments segments = new FetchedSegments(this.fs, segmentsDir.toString(),this.nutchConf);
+    FetchedSegments segments = new FetchedSegments(this.fs, segmentsDir.toString(),this.conf);
     
     this.segmentNames = segments.getSegmentNames();
 
@@ -141,7 +143,7 @@
     this.content = segments;
 
     LOG.info("opening linkdb in " + linkDb);
-    this.linkDb = new LinkDbInlinks(fs, linkDb, this.nutchConf);
+    this.linkDb = new LinkDbInlinks(fs, linkDb, this.conf);
   }
 
   private void init(DistributedSearch.Client client) {
@@ -230,7 +232,7 @@
     if (maxHitsPerDup <= 0)                      // disable dup checking
       return search(query, numHits, dedupField, sortField, reverse);
 
-    float rawHitsFactor = this.nutchConf.getFloat("searcher.hostgrouping.rawhits.factor", 2.0f);
+    float rawHitsFactor = this.conf.getFloat("searcher.hostgrouping.rawhits.factor", 2.0f);
     int numHitsRaw = (int)(numHits * rawHitsFactor);
     LOG.info("searching for "+numHitsRaw+" raw hits");
     Hits hits = searcher.search(query, numHitsRaw,
@@ -359,9 +361,9 @@
       System.exit(-1);
     }
 
-    NutchConf nutchConf = new NutchConf();
-    NutchBean bean = new NutchBean(nutchConf);
-    Query query = Query.parse(args[0], nutchConf);
+    Configuration conf = NutchConfiguration.create();
+    NutchBean bean = new NutchBean(conf);
+    Query query = Query.parse(args[0], conf);
     Hits hits = bean.search(query, 10);
     System.out.println("Total hits: " + hits.getTotal());
     int length = (int)Math.min(hits.getTotal(), 10);

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java Fri Feb  3 16:38:32 2006
@@ -32,7 +32,7 @@
 
 import javax.xml.parsers.*;
 
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 import org.w3c.dom.*;
 import javax.xml.transform.TransformerFactory;
 import javax.xml.transform.Transformer;
@@ -57,12 +57,12 @@
   }
 
   private NutchBean bean;
-  private NutchConf nutchConf;
+  private Configuration conf;
 
-  public void init(ServletConfig config, NutchConf nutchConf) throws ServletException {
+  public void init(ServletConfig config, Configuration conf) throws ServletException {
     try {
-      bean = NutchBean.get(config.getServletContext(), nutchConf);
-      this.nutchConf = nutchConf;
+      bean = NutchBean.get(config.getServletContext(), conf);
+      this.conf = conf;
     } catch (IOException e) {
       throw new ServletException(e);
     }
@@ -118,7 +118,7 @@
         (sort == null ? "" : "&sort=" + sort + (reverse? "&reverse=true": "") +
         (dedupField == null ? "" : "&dedupField=" + dedupField));
 
-    Query query = Query.parse(queryString, this.nutchConf);
+    Query query = Query.parse(queryString, this.conf);
     NutchBean.LOG.info("query: " + queryString);
 
     // execute the query

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Query.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Query.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Query.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Query.java Fri Feb  3 16:38:32 2006
@@ -25,11 +25,11 @@
 import java.util.ArrayList;
 import java.util.logging.Logger;
 
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.analysis.NutchAnalysis;
-
-import org.apache.nutch.io.Writable;
+import org.apache.nutch.util.NutchConfiguration;
+import org.apache.hadoop.io.Writable;
 
 /** A Nutch query. */
 public final class Query implements Writable, Cloneable {
@@ -50,32 +50,32 @@
     private float weight = 1.0f;
     private Object termOrPhrase;
 
-    private NutchConf nutchConf; 
+    private Configuration conf; 
 
     public Clause(Term term, String field,
-                  boolean isRequired, boolean isProhibited, NutchConf nutchConf) {
-      this(term, isRequired, isProhibited, nutchConf);
+                  boolean isRequired, boolean isProhibited, Configuration conf) {
+      this(term, isRequired, isProhibited, conf);
       this.field = field;
     }
 
-    public Clause(Term term, boolean isRequired, boolean isProhibited, NutchConf nutchConf) {
+    public Clause(Term term, boolean isRequired, boolean isProhibited, Configuration conf) {
       this.isRequired = isRequired;
       this.isProhibited = isProhibited;
       this.termOrPhrase = term;
-      this.nutchConf = nutchConf;
+      this.conf = conf;
     }
 
     public Clause(Phrase phrase, String field,
-                  boolean isRequired, boolean isProhibited, NutchConf nutchConf) {
-      this(phrase, isRequired, isProhibited, nutchConf);
+                  boolean isRequired, boolean isProhibited, Configuration conf) {
+      this(phrase, isRequired, isProhibited, conf);
       this.field = field;
     }
 
-    public Clause(Phrase phrase, boolean isRequired, boolean isProhibited, NutchConf nutchConf) {
+    public Clause(Phrase phrase, boolean isRequired, boolean isProhibited, Configuration conf) {
       this.isRequired = isRequired;
       this.isProhibited = isProhibited;
       this.termOrPhrase = phrase;
-      this.nutchConf = nutchConf;
+      this.conf = conf;
     }
 
     public boolean isRequired() { return isRequired; }
@@ -109,7 +109,7 @@
         getTerm().write(out);
     }
 
-    public static Clause read(DataInput in, NutchConf nutchConf) throws IOException {
+    public static Clause read(DataInput in, Configuration conf) throws IOException {
       byte bits = in.readByte();
       boolean required = ((bits & REQUIRED_BIT) != 0);
       boolean prohibited = ((bits & PROHIBITED_BIT) != 0);
@@ -119,9 +119,9 @@
 
       Clause clause;
       if ((bits & PHRASE_BIT) == 0) {
-        clause = new Clause(Term.read(in), field, required, prohibited, nutchConf);
+        clause = new Clause(Term.read(in), field, required, prohibited, conf);
       } else {
-        clause = new Clause(Phrase.read(in), field, required, prohibited, nutchConf);
+        clause = new Clause(Phrase.read(in), field, required, prohibited, conf);
       }
       clause.weight = weight;
       return clause;
@@ -140,7 +140,7 @@
         buffer.append(":");
       }
 
-      if (!isPhrase() && new QueryFilters(nutchConf).isRawField(field)) {
+      if (!isPhrase() && new QueryFilters(conf).isRawField(field)) {
         buffer.append('"');                        // quote raw terms
         buffer.append(termOrPhrase.toString());
         buffer.append('"');
@@ -279,12 +279,12 @@
 
   private ArrayList clauses = new ArrayList();
 
-  private NutchConf nutchConf;
+  private Configuration conf;
 
   private static final Clause[] CLAUSES_PROTO = new Clause[0];
   
-  public Query(NutchConf nutchConf) {
-      this.nutchConf = nutchConf;
+  public Query(Configuration conf) {
+      this.conf = conf;
   }
 
   /** Return all clauses. */
@@ -299,7 +299,7 @@
 
   /** Add a required term in a specified field. */
   public void addRequiredTerm(String term, String field) {
-    clauses.add(new Clause(new Term(term), field, true, false, this.nutchConf));
+    clauses.add(new Clause(new Term(term), field, true, false, this.conf));
   }
 
   /** Add a prohibited term in the default field. */
@@ -309,7 +309,7 @@
 
   /** Add a prohibited term in the specified field. */
   public void addProhibitedTerm(String term, String field) {
-    clauses.add(new Clause(new Term(term), field, false, true, this.nutchConf));
+    clauses.add(new Clause(new Term(term), field, false, true, this.conf));
   }
 
   /** Add a required phrase in the default field. */
@@ -323,7 +323,7 @@
     } else if (terms.length == 1) {
       addRequiredTerm(terms[0], field);           // optimize to term query
     } else {
-      clauses.add(new Clause(new Phrase(terms), field, true, false, this.nutchConf));
+      clauses.add(new Clause(new Phrase(terms), field, true, false, this.conf));
     }
   }
 
@@ -338,7 +338,7 @@
     } else if (terms.length == 1) {
       addProhibitedTerm(terms[0], field);         // optimize to term query
     } else {
-      clauses.add(new Clause(new Phrase(terms), field, false, true, this.nutchConf));
+      clauses.add(new Clause(new Phrase(terms), field, false, true, this.conf));
     }
   }
 
@@ -348,8 +348,8 @@
       ((Clause)clauses.get(i)).write(out);
   }
   
-  public static Query read(DataInput in, NutchConf nutchConf) throws IOException {
-    Query result = new Query(nutchConf);
+  public static Query read(DataInput in, Configuration conf) throws IOException {
+    Query result = new Query(conf);
     result.readFields(in);
     return result;
   }
@@ -358,7 +358,7 @@
     clauses.clear();
     int length = in.readByte();
     for (int i = 0; i < length; i++)
-      clauses.add(Clause.read(in, this.nutchConf));
+      clauses.add(Clause.read(in, this.conf));
   }
 
   public String toString() {
@@ -415,18 +415,18 @@
 
 
   /** Parse a query from a string. */
-  public static Query parse(String queryString, NutchConf nutchConf) throws IOException {
-    return fixup(NutchAnalysis.parseQuery(queryString, nutchConf), nutchConf);
+  public static Query parse(String queryString, Configuration conf) throws IOException {
+    return fixup(NutchAnalysis.parseQuery(queryString, conf), conf);
   }
 
   /** Convert clauses in unknown fields to the default field. */
-  private static Query fixup(Query input, NutchConf nutchConf) {
+  private static Query fixup(Query input, Configuration conf) {
     // walk the query
-    Query output = new Query(nutchConf);
+    Query output = new Query(conf);
     Clause[] clauses = input.getClauses();
     for (int i = 0; i < clauses.length; i++) {
       Clause c = clauses[i];
-      if (!new QueryFilters(nutchConf).isField(c.getField())) {  // unknown field
+      if (!new QueryFilters(conf).isField(c.getField())) {  // unknown field
         ArrayList terms = new ArrayList();        // add name to query
         if (c.isPhrase()) {                       
           terms.addAll(Arrays.asList(c.getPhrase().getTerms()));
@@ -447,13 +447,13 @@
   /** For debugging. */
   public static void main(String[] args) throws Exception {
     BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
-    NutchConf nutchConf = new NutchConf();
+    Configuration conf = NutchConfiguration.create();
     while (true) {
       System.out.print("Query: ");
       String line = in.readLine();
-      Query query = parse(line, nutchConf);
+      Query query = parse(line, conf);
       System.out.println("Parsed: " + query);
-      System.out.println("Translated: " + new QueryFilters(nutchConf).filter(query));
+      System.out.println("Translated: " + new QueryFilters(conf).filter(query));
     }
   }
 }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilter.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilter.java Fri Feb  3 16:38:32 2006
@@ -17,13 +17,13 @@
 package org.apache.nutch.searcher;
 
 import org.apache.lucene.search.BooleanQuery;
-import org.apache.nutch.util.NutchConfigurable;
+import org.apache.hadoop.conf.Configurable;
 
 /** Extension point for query translation.  Permits one to add metadata to a
  * query.  All plugins found which implement this extension point are run
  * sequentially on the query.
  */
-public interface QueryFilter extends NutchConfigurable {
+public interface QueryFilter extends Configurable {
   /** The name of the extension point. */
   final static String X_POINT_ID = QueryFilter.class.getName();
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilters.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilters.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilters.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilters.java Fri Feb  3 16:38:32 2006
@@ -18,8 +18,8 @@
 
 import org.apache.nutch.plugin.*;
 import org.apache.nutch.searcher.Query.Clause;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
 
 import java.util.logging.Logger;
 import java.util.*;
@@ -48,12 +48,12 @@
     return Collections.list(new StringTokenizer(fields, " ,\t\n\r"));
   }
 
-  public QueryFilters(NutchConf nutchConf) {
-    this.queryFilters = (QueryFilter[]) nutchConf.getObject(QueryFilter.class
+  public QueryFilters(Configuration conf) {
+    this.queryFilters = (QueryFilter[]) conf.getObject(QueryFilter.class
         .getName());
     if (this.queryFilters == null) {
       try {
-        ExtensionPoint point = nutchConf.getPluginRepository()
+        ExtensionPoint point = PluginRepository.get(conf)
             .getExtensionPoint(QueryFilter.X_POINT_ID);
         if (point == null)
           throw new RuntimeException(QueryFilter.X_POINT_ID + " not found.");
@@ -73,20 +73,20 @@
           filters[i] = (QueryFilter) extension.getExtensionInstance();
           FIELD_NAMES.addAll(fieldNames);
           FIELD_NAMES.addAll(rawFieldNames);
-          nutchConf.setObject("FIELD_NAMES", FIELD_NAMES);
+          conf.setObject("FIELD_NAMES", FIELD_NAMES);
           RAW_FIELD_NAMES.addAll(rawFieldNames);
-          nutchConf.setObject("RAW_FIELD_NAMES", RAW_FIELD_NAMES);
+          conf.setObject("RAW_FIELD_NAMES", RAW_FIELD_NAMES);
         }
-        nutchConf.setObject(QueryFilter.class.getName(), filters);
+        conf.setObject(QueryFilter.class.getName(), filters);
       } catch (PluginRuntimeException e) {
         throw new RuntimeException(e);
       }
-      this.queryFilters = (QueryFilter[]) nutchConf.getObject(QueryFilter.class
+      this.queryFilters = (QueryFilter[]) conf.getObject(QueryFilter.class
           .getName());
     } else {
       // cache already filled
-      FIELD_NAMES = (HashSet) nutchConf.getObject("FIELD_NAMES");
-      RAW_FIELD_NAMES = (HashSet) nutchConf.getObject("RAW_FIELD_NAMES");
+      FIELD_NAMES = (HashSet) conf.getObject("FIELD_NAMES");
+      RAW_FIELD_NAMES = (HashSet) conf.getObject("RAW_FIELD_NAMES");
     }
   }              
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Summarizer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Summarizer.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Summarizer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Summarizer.java Fri Feb  3 16:38:32 2006
@@ -23,23 +23,24 @@
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.searcher.Summary.*;
 import org.apache.nutch.analysis.NutchDocumentAnalyzer;
+import org.apache.nutch.util.NutchConfiguration;
 
 /** Implements hit summarization. */
 public class Summarizer {
    
   /** Converts text to tokens. */
   private Analyzer ANALYZER;
-  private NutchConf nutchConf;
+  private Configuration conf;
 
   /**
    * The constructor.
    * @param conf
    */
-  public Summarizer(NutchConf conf) {
-    this.nutchConf = conf;
+  public Summarizer(Configuration conf) {
+    this.conf = conf;
     this.ANALYZER = new NutchDocumentAnalyzer(conf);
   }
 
@@ -292,7 +293,7 @@
             return;
         }
 
-        Summarizer s = new Summarizer(new NutchConf());
+        Summarizer s = new Summarizer(NutchConfiguration.create());
 
         //
         // Parse the args
@@ -320,11 +321,11 @@
             in.close();
         }
 
-        NutchConf nutchConf = new NutchConf();
-        int sumContext = nutchConf.getInt("searcher.summary.context", 5);
-        int sumLength = nutchConf.getInt("searcher.summary.length", 20);
+        Configuration conf = NutchConfiguration.create();
+        int sumContext = conf.getInt("searcher.summary.context", 5);
+        int sumLength = conf.getInt("searcher.summary.length", 20);
         // Convert the query string into a proper Query
-        Query query = Query.parse(queryBuf.toString(), nutchConf);
+        Query query = Query.parse(queryBuf.toString(), conf);
         System.out.println("Summary: '" + s.getSummary(body.toString(), query, sumContext, sumLength) + "'");
     }
 }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Fri Feb  3 16:38:32 2006
@@ -28,30 +28,31 @@
 import java.util.logging.Logger;
 
 import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.fs.NutchFileSystem;
-import org.apache.nutch.io.ObjectWritable;
-import org.apache.nutch.io.UTF8;
-import org.apache.nutch.io.Writable;
-import org.apache.nutch.io.WritableComparable;
-import org.apache.nutch.mapred.FileSplit;
-import org.apache.nutch.mapred.JobClient;
-import org.apache.nutch.mapred.JobConf;
-import org.apache.nutch.mapred.OutputCollector;
-import org.apache.nutch.mapred.RecordReader;
-import org.apache.nutch.mapred.RecordWriter;
-import org.apache.nutch.mapred.Reducer;
-import org.apache.nutch.mapred.Reporter;
-import org.apache.nutch.mapred.SequenceFileInputFormat;
-import org.apache.nutch.mapred.SequenceFileRecordReader;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.ObjectWritable;
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.RecordWriter;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.SequenceFileRecordReader;
 import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.parse.ParseText;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
-import org.apache.nutch.util.NutchConfigured;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.nutch.util.NutchConfiguration;
 
 /** Dump the content of a segment. */
-public class SegmentReader extends NutchConfigured implements Reducer {
+public class SegmentReader extends Configured implements Reducer {
 
   public static final String DIR_NAME = "segdump";
 
@@ -63,7 +64,7 @@
   /** Wraps inputs in an {@link ObjectWritable}, to permit merging different
    * types in reduce. */
   public static class InputFormat extends SequenceFileInputFormat {
-    public RecordReader getRecordReader(NutchFileSystem fs, FileSplit split,
+    public RecordReader getRecordReader(FileSystem fs, FileSplit split,
                                         JobConf job, Reporter reporter)
       throws IOException {
       reporter.setStatus(split.toString());
@@ -85,8 +86,8 @@
 
   /** Implements a text output format*/
   public static class TextOutputFormat
-  implements org.apache.nutch.mapred.OutputFormat {
-  public RecordWriter getRecordWriter(final NutchFileSystem fs, JobConf job,
+  implements org.apache.hadoop.mapred.OutputFormat {
+  public RecordWriter getRecordWriter(final FileSystem fs, JobConf job,
                                       String name) throws IOException {
 
    final File segmentDumpFile =
@@ -113,7 +114,7 @@
       super(null); 
   }
 
-  public SegmentReader(NutchConf conf) {
+  public SegmentReader(Configuration conf) {
     super(conf);
   }
 
@@ -170,23 +171,23 @@
     JobClient.runJob(job);
     
     // concatenate the output
-    NutchFileSystem nfs = NutchFileSystem.get(job);
+    FileSystem fs = FileSystem.get(job);
     File directory = new File(job.getOutputDir(), SegmentReader.DIR_NAME);
     File dumpFile = new File(directory, job.get("segment.dump.dir", "dump"));
 
     // remove the old file 
-    nfs.delete(dumpFile);
-    File[] files = nfs.listFiles(directory);
+    fs.delete(dumpFile);
+    File[] files = fs.listFiles(directory);
     
     PrintWriter writer = null;
     int currentReccordNumber = 0;
     if (files.length > 0) {
-        writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(nfs.create(dumpFile))));
+        writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(fs.create(dumpFile))));
         try {
             for (int i = 0 ; i < files.length; i++) {
                 File partFile = (File)files[i];
                 try {
-                    currentReccordNumber = append(nfs, job, partFile, writer, currentReccordNumber);
+                    currentReccordNumber = append(fs, job, partFile, writer, currentReccordNumber);
                 } catch (IOException exception) {
                     LOG.warning("Couldn't copy the content of " + partFile.toString() + " into " + dumpFile.toString());
                     LOG.warning(exception.getMessage());
@@ -201,8 +202,8 @@
   }
 
   /** Appends two files and updates the Recno counter*/
-  private int append(NutchFileSystem nfs, NutchConf conf, File src, PrintWriter writer, int currentReccordNumber) throws IOException {
-      BufferedReader reader = new BufferedReader(new InputStreamReader(nfs.open(src)));
+  private int append(FileSystem fs, Configuration conf, File src, PrintWriter writer, int currentReccordNumber) throws IOException {
+      BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(src)));
       try {
           String line = reader.readLine();
           while(line != null) {
@@ -219,8 +220,8 @@
   }
   
   public static void main(String[] args) throws Exception {
-    NutchConf nutchConf = new NutchConf();
-    SegmentReader segmentReader = new SegmentReader(nutchConf);
+    Configuration conf = NutchConfiguration.create();
+    SegmentReader segmentReader = new SegmentReader(conf);
 
     String usage = "Usage: SegmentReader <segment>";
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java Fri Feb  3 16:38:32 2006
@@ -21,7 +21,7 @@
 import org.apache.nutch.searcher.Hit;
 import org.apache.nutch.searcher.HitDetails;
 
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 
 import javax.servlet.http.HttpServlet;
 import javax.servlet.http.HttpServletRequest;
@@ -44,9 +44,9 @@
 
   NutchBean bean = null;
 
-  public void init(NutchConf nutchConf) {
+  public void init(Configuration conf) {
     try {
-      bean = NutchBean.get(this.getServletContext(), nutchConf);
+      bean = NutchBean.get(this.getServletContext(), conf);
     } catch (IOException e) {
       // nothing
     }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java Fri Feb  3 16:38:32 2006
@@ -26,10 +26,12 @@
 import org.xml.sax.helpers.*;
 import org.apache.xerces.util.XMLChar;
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.fs.*;
-import org.apache.nutch.util.*;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.conf.*;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
 
 /** Utility that converts DMOZ RDF into a flat file of URLs to be injected. */
 public class DmozParser {
@@ -335,8 +337,8 @@
     Pattern topicPattern = null; 
     Vector topics = new Vector(); 
     
-    NutchConf nutchConf = new NutchConf();
-    NutchFileSystem nfs = NutchFileSystem.get(nutchConf);
+    Configuration conf = NutchConfiguration.create();
+    FileSystem fs = FileSystem.get(conf);
     try {
       for (int i = 1; i < argv.length; i++) {
         if ("-includeAdultMaterial".equals(argv[i])) {
@@ -375,7 +377,7 @@
                            includeAdult, skew, topicPattern);
       
     } finally {
-      nfs.close();
+      fs.close();
     }
   }
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java Fri Feb  3 16:38:32 2006
@@ -37,9 +37,10 @@
 import java.util.Vector;
 import java.util.logging.Logger;
 
-import org.apache.nutch.io.UTF8;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
 
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.document.Document;
@@ -459,9 +460,9 @@
     if (qPath != null) {
       is = new FileInputStream(qPath);
     } else {
-        NutchConf nutchConf = new NutchConf();
-        qPath = nutchConf.get("prune.index.tool.queries");
-        is = nutchConf.getConfResourceAsInputStream(qPath);
+        Configuration conf = NutchConfiguration.create();
+        qPath = conf.get("prune.index.tool.queries");
+        is = conf.getConfResourceAsInputStream(qPath);
     }
     if (is == null) {
       LOG.severe("Can't load queries from " + qPath);

Added: lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java?rev=374796&view=auto
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java (added)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConfiguration.java Fri Feb  3 16:38:32 2006
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util;
+
+import org.apache.hadoop.conf.Configuration;
+
+/** Utility to create Hadoop {@link Configuration}s that include Nutch-specific
+ * resources.  */
+public class NutchConfiguration {
+
+  /** Create a {@link Configuration} for Nutch. */
+  public static Configuration create() {
+    Configuration conf = new Configuration();
+    addNutchResources(conf);
+    return conf;
+  }
+
+  /** Add the standard Nutch resources to {@link Configuration}. */
+  public static Configuration addNutchResources(Configuration conf) {
+    conf.addDefaultResource("nutch-default.xml");
+    conf.addFinalResource("nutch-site.xml");
+    return conf;
+  }
+}
+

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/util/ThreadPool.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/util/ThreadPool.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/util/ThreadPool.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/util/ThreadPool.java Fri Feb  3 16:38:32 2006
@@ -18,7 +18,7 @@
 
 import java.util.*;
 import java.util.logging.*;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.util.LogFormatter;
 
 /************************************************
  * ThreadPool.java                                                 

Modified: lucene/nutch/trunk/src/plugin/build.xml
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/build.xml?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/build.xml (original)
+++ lucene/nutch/trunk/src/plugin/build.xml Fri Feb  3 16:38:32 2006
@@ -44,6 +44,7 @@
   <!-- Test all of the plugins.                               -->
   <!-- ====================================================== -->
   <target name="test">
+    <parallel threadCount="2">
      <ant dir="creativecommons" target="test"/>
      <ant dir="languageidentifier" target="test"/>
      <ant dir="lib-http" target="test"/>
@@ -58,6 +59,7 @@
  <!-- <ant dir="parse-rtf" target="test"/> -->
      <ant dir="parse-swf" target="test"/>
      <ant dir="parse-zip" target="test"/>
+    </parallel>
   </target>
 
   <!-- ====================================================== -->

Modified: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java (original)
+++ lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCDeleteUnlicensedTool.java Fri Feb  3 16:38:32 2006
@@ -16,8 +16,8 @@
 
 package org.creativecommons.nutch;
 
-import org.apache.nutch.io.*;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.io.*;
+import org.apache.hadoop.util.LogFormatter;
 import org.apache.nutch.indexer.Indexer;
 
 import org.apache.lucene.index.IndexReader;

Modified: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java Fri Feb  3 16:38:32 2006
@@ -23,14 +23,14 @@
 
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.IndexingException;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
 
 import java.util.logging.Logger;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
 
 import java.util.*;
 import java.net.URL;
@@ -44,7 +44,7 @@
   /** The name of the document field we use. */
   public static String FIELD = "cc";
 
-  private NutchConf nutchConf;
+  private Configuration conf;
 
   public Document filter(Document doc, Parse parse, UTF8 url, CrawlDatum datum, Inlinks inlinks)
     throws IndexingException {
@@ -103,12 +103,12 @@
     doc.add(Field.Keyword(FIELD, feature));
   }
 
-  public void setConf(NutchConf conf) {
-    this.nutchConf = conf;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
   }
 
-  public NutchConf getConf() {
-    return this.nutchConf;
+  public Configuration getConf() {
+    return this.conf;
   }
 
 }

Modified: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java Fri Feb  3 16:38:32 2006
@@ -19,7 +19,7 @@
 import org.apache.nutch.parse.*;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 
 import java.util.*;
 import java.io.*;
@@ -29,7 +29,7 @@
 import org.w3c.dom.*;
 
 import java.util.logging.Logger;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.util.LogFormatter;
 
 /** Adds metadata identifying the Creative Commons license used, if any. */
 public class CCParseFilter implements HtmlParseFilter {
@@ -50,7 +50,7 @@
     }
 
     /** Scan the document adding attributes to metadata.*/
-    public static void walk(Node doc, URL base, ContentProperties metadata, NutchConf nutchConf)
+    public static void walk(Node doc, URL base, ContentProperties metadata, Configuration conf)
       throws ParseException {
 
       // walk the DOM tree, scanning for license data
@@ -69,7 +69,7 @@
       } else if (walker.anchorLicense != null) {  // 3rd: anchor w/ CC license
         licenseLocation = "a";
         licenseUrl = walker.anchorLicense.toString();
-      } else if (nutchConf.getBoolean("creativecommons.exclude.unlicensed", false)) {
+      } else if (conf.getBoolean("creativecommons.exclude.unlicensed", false)) {
         throw new ParseException("No CC license.  Excluding.");
       }
 
@@ -249,7 +249,7 @@
     WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/Image", "image");
   }
 
-  private NutchConf nutchConf;
+  private Configuration conf;
 
   /** Adds metadata or otherwise modifies a parse of an HTML document, given
    * the DOM tree of a page. */
@@ -273,11 +273,11 @@
     return parse;
   }
 
-  public void setConf(NutchConf conf) {
-    this.nutchConf = conf;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
   }
 
-  public NutchConf getConf() {
-    return this.nutchConf;
+  public Configuration getConf() {
+    return this.conf;
   }
 }

Modified: lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCQueryFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCQueryFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCQueryFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCQueryFilter.java Fri Feb  3 16:38:32 2006
@@ -17,24 +17,24 @@
 package org.creativecommons.nutch;
 
 import org.apache.nutch.searcher.RawFieldQueryFilter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 
 /**
  * Handles "cc:" query clauses, causing them to search the "cc" field indexed by
  * CCIndexingFilter.
  */
 public class CCQueryFilter extends RawFieldQueryFilter {
-  private NutchConf nutchConf;
+  private Configuration conf;
 
   public CCQueryFilter() {
     super(CCIndexingFilter.FIELD);
   }
 
-  public void setConf(NutchConf conf) {
-    this.nutchConf = conf;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
   }
 
-  public NutchConf getConf() {
-    return this.nutchConf;
+  public Configuration getConf() {
+    return this.conf;
   }
 }

Modified: lucene/nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java Fri Feb  3 16:38:32 2006
@@ -20,7 +20,8 @@
 import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
 
 import java.util.Properties;
 import java.io.*;
@@ -56,11 +57,11 @@
     }
     in.close();
     byte[] bytes = out.toByteArray();
-    NutchConf nutchConf = new NutchConf();
+    Configuration conf = NutchConfiguration.create();
 
     Content content =
-      new Content(url, url, bytes, contentType, new ContentProperties(), nutchConf);
-    Parse parse = new ParseUtil(nutchConf).parseByParserId("parse-html",content);
+      new Content(url, url, bytes, contentType, new ContentProperties(), conf);
+    Parse parse = new ParseUtil(conf).parseByParserId("parse-html",content);
 
     ContentProperties metadata = parse.getData().getMetadata();
     assertEquals(license, metadata.get("License-Url"));

Modified: lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java Fri Feb  3 16:38:32 2006
@@ -23,7 +23,7 @@
 
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.IndexingException;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
@@ -32,8 +32,8 @@
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.logging.Logger;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
 
 /** Adds basic searchable fields to a document. */
 public class BasicIndexingFilter implements IndexingFilter {
@@ -41,7 +41,7 @@
     = LogFormatter.getLogger(BasicIndexingFilter.class.getName());
 
   private int MAX_TITLE_LENGTH;
-  private NutchConf nutchConf;
+  private Configuration conf;
 
   public Document filter(Document doc, Parse parse, UTF8 url, CrawlDatum datum, Inlinks inlinks)
     throws IndexingException {
@@ -89,13 +89,13 @@
     return doc;
   }
 
-  public void setConf(NutchConf conf) {
-    this.nutchConf = conf;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
     this.MAX_TITLE_LENGTH = conf.getInt("indexer.max.title.length", 100);
   }
 
-  public NutchConf getConf() {
-    return this.nutchConf;
+  public Configuration getConf() {
+    return this.conf;
   }
 
 }

Modified: lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java Fri Feb  3 16:38:32 2006
@@ -33,17 +33,17 @@
 
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.IndexingException;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
 
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
 
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.mime.MimeType;
 import org.apache.nutch.util.mime.MimeTypes;
 import org.apache.nutch.util.mime.MimeTypeException;
 
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.util.LogFormatter;
 import java.util.logging.Logger;
 
 import java.text.DateFormat;
@@ -245,7 +245,7 @@
   // Content-Disposition: inline; filename="foo.ppt"
   private PatternMatcher matcher = new Perl5Matcher();
 
-  private NutchConf nutchConf;
+  private Configuration conf;
   static Perl5Pattern patterns[] = {null, null};
   static {
     Perl5Compiler compiler = new Perl5Compiler();
@@ -301,14 +301,14 @@
     return normalized;
   }
 
-  public void setConf(NutchConf conf) {
-    this.nutchConf = conf;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
     MAGIC = conf.getBoolean("mime.type.magic", true);
     MIME = MimeTypes.get(getConf().get("mime.types.file"));
   }
 
-  public NutchConf getConf() {
-    return this.nutchConf;
+  public Configuration getConf() {
+    return this.conf;
   }
 
 }

Modified: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java (original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java Fri Feb  3 16:38:32 2006
@@ -27,8 +27,8 @@
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.HtmlParseFilter;
 import org.apache.nutch.protocol.Content;
-import org.apache.nutch.util.LogFormatter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
 
 // DOM imports
 import org.w3c.dom.DocumentFragment;
@@ -69,7 +69,7 @@
     }
   }
 
-  private NutchConf nutchConf;
+  private Configuration conf;
   
 
   
@@ -198,11 +198,11 @@
     
   }
 
-  public void setConf(NutchConf conf) {
-    this.nutchConf = conf;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
   }
 
-  public NutchConf getConf() {
-    return this.nutchConf;
+  public Configuration getConf() {
+    return this.conf;
   }
 }

Modified: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java (original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java Fri Feb  3 16:38:32 2006
@@ -35,7 +35,7 @@
 // Nutch imports
 import org.apache.nutch.analysis.lang.NGramProfile.NGramEntry;
 import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.ParserFactory;
@@ -46,8 +46,9 @@
 import org.apache.nutch.protocol.ProtocolFactory;
 import org.apache.nutch.protocol.ProtocolNotFound;
 import org.apache.nutch.protocol.ProtocolException;
-import org.apache.nutch.util.NutchConf;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.nutch.util.NutchConfiguration;
 
 
 /**
@@ -95,12 +96,12 @@
   /**
    * Constructs a new Language Identifier.
    */
-  public LanguageIdentifier(NutchConf nutchConf) {
+  public LanguageIdentifier(Configuration conf) {
 
     // Gets ngram sizes to take into account from the Nutch Config
-    minLength = nutchConf.getInt("lang.ngram.min.length",
+    minLength = conf.getInt("lang.ngram.min.length",
                                        NGramProfile.DEFAULT_MIN_NGRAM_LENGTH);
-    maxLength = nutchConf.getInt("lang.ngram.max.length",
+    maxLength = conf.getInt("lang.ngram.max.length",
                                        NGramProfile.DEFAULT_MAX_NGRAM_LENGTH);
     // Ensure the min and max values are in an acceptale range
     // (ie min >= DEFAULT_MIN_NGRAM_LENGTH and max <= DEFAULT_MAX_NGRAM_LENGTH)
@@ -110,7 +111,7 @@
     minLength = Math.min(minLength, maxLength);
 
     // Gets the value of the maximum size of data to analyze
-    analyzeLength = nutchConf.getInt("lang.analyze.max.length",
+    analyzeLength = conf.getInt("lang.analyze.max.length",
                                            DEFAULT_ANALYSIS_LENGTH);
     
     Properties p = new Properties();
@@ -258,10 +259,10 @@
 
     }
 
-    NutchConf nutchConf = new NutchConf();
+    Configuration conf = NutchConfiguration.create();
     String lang = null;
     //LanguageIdentifier idfr = LanguageIdentifier.getInstance();
-    LanguageIdentifier idfr = new LanguageIdentifier(nutchConf);
+    LanguageIdentifier idfr = new LanguageIdentifier(conf);
     File f;
     FileInputStream fis;
     try {
@@ -279,7 +280,7 @@
           break;
 
         case IDURL:
-          text = getUrlContent(filename, nutchConf);
+          text = getUrlContent(filename, conf);
           lang = idfr.identify(text);
           break;
 
@@ -335,13 +336,13 @@
    * @param url
    * @return contents of url
    */
-  private static String getUrlContent(String url, NutchConf nutchConf) {
+  private static String getUrlContent(String url, Configuration conf) {
     Protocol protocol;
     try {
-      protocol = new ProtocolFactory(nutchConf).getProtocol(url);
+      protocol = new ProtocolFactory(conf).getProtocol(url);
       Content content = protocol.getProtocolOutput(new UTF8(url), new CrawlDatum()).getContent();
       String contentType = content.getContentType();
-      Parser parser = new ParserFactory(nutchConf).getParser(contentType, url);
+      Parser parser = new ParserFactory(conf).getParser(contentType, url);
       Parse parse = parser.getParse(content);
       System.out.println("text:" + parse.getText());
       return parse.getText();

Modified: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java Fri Feb  3 16:38:32 2006
@@ -22,9 +22,9 @@
 import org.apache.nutch.fetcher.FetcherOutput;
 import org.apache.nutch.indexer.IndexingFilter;
 import org.apache.nutch.indexer.IndexingException;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
 import org.apache.nutch.parse.Parse;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 
 // Lucene imports
 import org.apache.lucene.document.Field;
@@ -50,7 +50,7 @@
 public class LanguageIndexingFilter implements IndexingFilter {
   
 
-  private NutchConf nutchConf;
+  private Configuration conf;
   private LanguageIdentifier languageIdentifier;
 
 /**
@@ -93,12 +93,12 @@
     return doc;
   }
   
-  public void setConf(NutchConf conf) {
-    this.nutchConf = conf;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
     this.languageIdentifier = new LanguageIdentifier(conf);
   }
 
-  public NutchConf getConf() {
-    return this.nutchConf;
+  public Configuration getConf() {
+    return this.conf;
   }
 }

Modified: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageQueryFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageQueryFilter.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageQueryFilter.java (original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageQueryFilter.java Fri Feb  3 16:38:32 2006
@@ -17,22 +17,22 @@
 package org.apache.nutch.analysis.lang;
 
 import org.apache.nutch.searcher.RawFieldQueryFilter;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 
 /** Handles "lang:" query clauses, causing them to search the "lang" field
  * indexed by LanguageIdentifier. */
 public class LanguageQueryFilter extends RawFieldQueryFilter {
-  private NutchConf nutchConf;
+  private Configuration conf;
 
   public LanguageQueryFilter() {
     super("lang");
   }
   
-  public void setConf(NutchConf conf) {
-    this.nutchConf = conf;
+  public void setConf(Configuration conf) {
+    this.conf = conf;
   }
 
-  public NutchConf getConf() {
-    return this.nutchConf;
+  public Configuration getConf() {
+    return this.conf;
   }
 }

Modified: lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/NGramProfile.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/NGramProfile.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/NGramProfile.java (original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/NGramProfile.java Fri Feb  3 16:38:32 2006
@@ -35,7 +35,7 @@
 import java.util.logging.Logger;
 
 // Nutch imports
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.util.LogFormatter;
 
 // Lucene imports
 import org.apache.lucene.analysis.Token;

Modified: lucene/nutch/trunk/src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java (original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java Fri Feb  3 16:38:32 2006
@@ -26,7 +26,8 @@
 import org.apache.nutch.parse.ParserFactory;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.ContentProperties;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
 
 
 public class TestHTMLLanguageParser extends TestCase {
@@ -53,7 +54,7 @@
       for (int t = 0; t < docs.length; t++) {
 
         Content content = getContent(docs[t]);
-        Parser parser = new ParserFactory(new NutchConf()).getParser("text/html", URL);
+        Parser parser = new ParserFactory(NutchConfiguration.create()).getParser("text/html", URL);
         Parse parse = parser.getParse(content);
 
         assertEquals(metalanguages[t], (String) parse.getData().get(
@@ -126,7 +127,7 @@
     ContentProperties p = new ContentProperties();
     p.put("Content-Type", "text/html");
 
-    Content content = new Content(URL, BASE, text.getBytes(), "text/html", p, new NutchConf());
+    Content content = new Content(URL, BASE, text.getBytes(), "text/html", p, NutchConfiguration.create());
     return content;
   }
 

Modified: lucene/nutch/trunk/src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestLanguageIdentifier.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestLanguageIdentifier.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestLanguageIdentifier.java (original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestLanguageIdentifier.java Fri Feb  3 16:38:32 2006
@@ -32,8 +32,9 @@
 
 // Lucene imports
 import org.apache.lucene.analysis.Token;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 
+import org.apache.nutch.util.NutchConfiguration;
 
 /**
  * JUnit based test of class {@link LanguageIdentifier}.
@@ -205,7 +206,7 @@
     public void testIdentify() {
         try {
             long total = 0;
-            LanguageIdentifier idfr = new LanguageIdentifier(new NutchConf());
+            LanguageIdentifier idfr = new LanguageIdentifier(NutchConfiguration.create());
             BufferedReader in = new BufferedReader(new InputStreamReader(
                         this.getClass().getResourceAsStream("test-referencial.txt")));
             String line = null;

Modified: lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java (original)
+++ lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java Fri Feb  3 16:38:32 2006
@@ -26,14 +26,14 @@
 import java.util.logging.Logger;
 
 import org.apache.nutch.crawl.CrawlDatum;
-import org.apache.nutch.io.UTF8;
+import org.apache.hadoop.io.UTF8;
 import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.ProtocolException;
 import org.apache.nutch.protocol.ProtocolOutput;
 import org.apache.nutch.protocol.ProtocolStatus;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
 
 
 /**
@@ -111,7 +111,7 @@
   private Logger logger = LOGGER;
  
   /** The nutch configuration */
-  private NutchConf conf = null;
+  private Configuration conf = null;
  
 
   /** Creates a new instance of HttpBase */
@@ -128,7 +128,7 @@
   }
   
    // Inherited Javadoc
-    public void setConf(NutchConf conf) {
+    public void setConf(Configuration conf) {
         this.conf = conf;
         this.proxyHost = conf.get("http.proxy.host");
         this.proxyPort = conf.getInt("http.proxy.port", 8080);
@@ -145,7 +145,7 @@
     }
 
   // Inherited Javadoc
-  public NutchConf getConf() {
+  public Configuration getConf() {
     return this.conf;
   }
    

Modified: lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java (original)
+++ lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java Fri Feb  3 16:38:32 2006
@@ -29,9 +29,9 @@
 import java.util.logging.Logger;
 
 // Nutch imports
-import org.apache.nutch.util.NutchConf;
-import org.apache.nutch.util.NutchConfigurable;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.util.LogFormatter;
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.protocol.ProtocolException;
@@ -46,7 +46,7 @@
  * @author Mike Cafarella
  * @author Doug Cutting
  */
-public class RobotRulesParser implements NutchConfigurable {
+public class RobotRulesParser implements Configurable {
   
   public static final Logger LOG=
     LogFormatter.getLogger(RobotRulesParser.class.getName());
@@ -62,7 +62,7 @@
 
   private static RobotRuleSet FORBID_ALL_RULES = getForbidAllRules();
 
-  private NutchConf conf;
+  private Configuration conf;
   private HashMap robotNames;
 
   /**
@@ -176,16 +176,16 @@
 
   RobotRulesParser() { }
 
-  public RobotRulesParser(NutchConf conf) {
+  public RobotRulesParser(Configuration conf) {
     setConf(conf);
   }
 
 
   /* ---------------------------------- *
-   * <implementation:NutchConfigurable> *
+   * <implementation:Configurable> *
    * ---------------------------------- */
 
-  public void setConf(NutchConf conf) {
+  public void setConf(Configuration conf) {
     this.conf = conf;
     allowForbidden = conf.getBoolean("http.robots.403.allow", false);
     //
@@ -215,12 +215,12 @@
     setRobotNames((String[]) agents.toArray(new String[agents.size()]));
   }
 
-  public NutchConf getConf() {
+  public Configuration getConf() {
     return conf;
   }
 
   /* ---------------------------------- *
-   * <implementation:NutchConfigurable> *
+   * <implementation:Configurable> *
    * ---------------------------------- */
 
   private void setRobotNames(String[] robotNames) {

Modified: lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/OntologyImpl.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/OntologyImpl.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/OntologyImpl.java (original)
+++ lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/OntologyImpl.java Fri Feb  3 16:38:32 2006
@@ -16,8 +16,9 @@
 
 package org.apache.nutch.ontology;
 
-import org.apache.nutch.util.NutchConf;
-import org.apache.nutch.util.LogFormatter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.LogFormatter;
+import org.apache.nutch.util.NutchConfiguration;
 
 import com.hp.hpl.jena.ontology.Individual;
 import com.hp.hpl.jena.ontology.OntClass;
@@ -325,10 +326,10 @@
 
   public static void main( String[] args ) throws Exception {
 
-    NutchConf nutchConf = new NutchConf(); 
-    Ontology ontology = new OntologyFactory(nutchConf).getOntology();
+    Configuration conf = NutchConfiguration.create(); 
+    Ontology ontology = new OntologyFactory(conf).getOntology();
 
-    String urls = nutchConf.get("extension.ontology.urls");
+    String urls = conf.get("extension.ontology.urls");
     if (urls==null || urls.trim().equals("")) {
       LOG.severe("No ontology url found.");
       return;

Modified: lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/OwlParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/OwlParser.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/OwlParser.java (original)
+++ lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/OwlParser.java Fri Feb  3 16:38:32 2006
@@ -21,7 +21,7 @@
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-//import org.apache.nutch.util.LogFormatter;
+//import org.apache.hadoop.util.LogFormatter;
 
 import com.hp.hpl.jena.ontology.OntClass;
 import com.hp.hpl.jena.ontology.OntModel;

Modified: lucene/nutch/trunk/src/plugin/ontology/src/test/org/apache/nutch/ontology/TestOntology.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/ontology/src/test/org/apache/nutch/ontology/TestOntology.java?rev=374796&r1=374795&r2=374796&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/ontology/src/test/org/apache/nutch/ontology/TestOntology.java (original)
+++ lucene/nutch/trunk/src/plugin/ontology/src/test/org/apache/nutch/ontology/TestOntology.java Fri Feb  3 16:38:32 2006
@@ -25,7 +25,8 @@
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseException;
-import org.apache.nutch.util.NutchConf;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
 
 import junit.framework.TestCase;
 
@@ -51,13 +52,13 @@
   private String[] sampleFiles = {"time.owl"};
 
   private static Ontology ontology;
-  private NutchConf nutchConf;
+  private Configuration conf;
   public TestOntology(String name) { 
     super(name); 
   }
 
   protected void setUp() {
-      this.nutchConf = new NutchConf();
+      this.conf = NutchConfiguration.create();
   }
 
   protected void tearDown() {}
@@ -69,7 +70,7 @@
 
     if (ontology==null) {
       try {
-        ontology = new OntologyFactory(this.nutchConf).getOntology();
+        ontology = new OntologyFactory(this.conf).getOntology();
       } catch (Exception e) {
         throw new Exception("Failed to instantiate ontology");
       }