You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/01/31 17:13:17 UTC
svn commit: r373853 [1/6] - in /lucene/nutch/trunk/src: java/org/apache/nutch/analysis/ java/org/apache/nutch/clustering/ java/org/apache/nutch/crawl/ java/org/apache/nutch/fetcher/ java/org/apache/nutch/fs/ java/org/apache/nutch/indexer/ java/org/apac...

Author: ab
Date: Tue Jan 31 08:08:58 2006
New Revision: 373853

URL: http://svn.apache.org/viewcvs?rev=373853&view=rev
Log:
Apply patches from NUTCH-169 (remove static NutchConf).

Submitted by: Marko Bauhardt, Stefan Groschupf, Jerome Charron.


Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java
    lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.java
    lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.jj
    lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysisTokenManager.java
    lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/crawl/TextProfileSignature.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutput.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fs/FileUtil.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fs/LocalFileSystem.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fs/NDFSFileSystem.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fs/NDFSShell.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fs/NFSDataInputStream.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fs/NFSDataOutputStream.java
    lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java
    lucene/nutch/trunk/src/java/org/apache/nutch/indexer/NdfsDirectory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/io/ArrayFile.java
    lucene/nutch/trunk/src/java/org/apache/nutch/io/MapFile.java
    lucene/nutch/trunk/src/java/org/apache/nutch/io/ObjectWritable.java
    lucene/nutch/trunk/src/java/org/apache/nutch/io/SequenceFile.java
    lucene/nutch/trunk/src/java/org/apache/nutch/io/SetFile.java
    lucene/nutch/trunk/src/java/org/apache/nutch/ipc/Client.java
    lucene/nutch/trunk/src/java/org/apache/nutch/ipc/RPC.java
    lucene/nutch/trunk/src/java/org/apache/nutch/ipc/Server.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/CombiningCollector.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/JobClient.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/JobConf.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/JobTracker.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/LocalJobRunner.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/MapFileOutputFormat.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/MapOutputFile.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/MapTask.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/MapTaskRunner.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/ReduceTask.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/ReduceTaskRunner.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/SequenceFileInputFormat.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/SequenceFileOutputFormat.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/SequenceFileRecordReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/Task.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/TaskRunner.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/TaskTracker.java
    lucene/nutch/trunk/src/java/org/apache/nutch/mapred/demo/Grep.java
    lucene/nutch/trunk/src/java/org/apache/nutch/ndfs/DataNode.java
    lucene/nutch/trunk/src/java/org/apache/nutch/ndfs/FSConstants.java
    lucene/nutch/trunk/src/java/org/apache/nutch/ndfs/FSDataset.java
    lucene/nutch/trunk/src/java/org/apache/nutch/ndfs/FSNamesystem.java
    lucene/nutch/trunk/src/java/org/apache/nutch/ndfs/NDFSClient.java
    lucene/nutch/trunk/src/java/org/apache/nutch/ndfs/NameNode.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/BasicUrlNormalizer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/RegexUrlNormalizer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilterChecker.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/UrlNormalizer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/net/UrlNormalizerFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/ontology/OntologyFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/HtmlParseFilter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/HtmlParseFilters.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/Outlink.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/OutlinkExtractor.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseImpl.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseOutputFormat.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParsePluginsReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseStatus.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseText.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseUtil.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/Parser.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java
    lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/plugin/Extension.java
    lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java
    lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java
    lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Content.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/Protocol.java
    lucene/nutch/trunk/src/java/org/apache/nutch/protocol/ProtocolFactory.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/DistributedSearch.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FetchedSegments.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FieldQueryFilter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/IndexSearcher.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LinkDbInlinks.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/NutchBean.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/OpenSearchServlet.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Query.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilter.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/QueryFilters.java
    lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Summarizer.java
    lucene/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
    lucene/nutch/trunk/src/java/org/apache/nutch/servlet/Cached.java
    lucene/nutch/trunk/src/java/org/apache/nutch/tools/DmozParser.java
    lucene/nutch/trunk/src/java/org/apache/nutch/tools/PruneIndexTool.java
    lucene/nutch/trunk/src/java/org/apache/nutch/util/NutchConf.java
    lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCIndexingFilter.java
    lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
    lucene/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCQueryFilter.java
    lucene/nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
    lucene/nutch/trunk/src/plugin/index-basic/src/java/org/apache/nutch/indexer/basic/BasicIndexingFilter.java
    lucene/nutch/trunk/src/plugin/index-more/src/java/org/apache/nutch/indexer/more/MoreIndexingFilter.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/HTMLLanguageParser.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIdentifier.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageIndexingFilter.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/java/org/apache/nutch/analysis/lang/LanguageQueryFilter.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java
    lucene/nutch/trunk/src/plugin/languageidentifier/src/test/org/apache/nutch/analysis/lang/TestLanguageIdentifier.java
    lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/HttpBase.java
    lucene/nutch/trunk/src/plugin/lib-http/src/java/org/apache/nutch/protocol/http/api/RobotRulesParser.java
    lucene/nutch/trunk/src/plugin/ontology/src/java/org/apache/nutch/ontology/OntologyImpl.java
    lucene/nutch/trunk/src/plugin/ontology/src/test/org/apache/nutch/ontology/TestOntology.java
    lucene/nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
    lucene/nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
    lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMContentUtils.java
    lucene/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
    lucene/nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java
    lucene/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
    lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MP3Parser.java
    lucene/nutch/trunk/src/plugin/parse-mp3/src/java/org/apache/nutch/parse/mp3/MetadataCollector.java
    lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/java/org/apache/nutch/parse/mspowerpoint/MSPowerPointParser.java
    lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java
    lucene/nutch/trunk/src/plugin/parse-msword/src/java/org/apache/nutch/parse/msword/MSWordParser.java
    lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java
    lucene/nutch/trunk/src/plugin/parse-pdf/src/java/org/apache/nutch/parse/pdf/PdfParser.java
    lucene/nutch/trunk/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java
    lucene/nutch/trunk/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java
    lucene/nutch/trunk/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java
    lucene/nutch/trunk/src/plugin/parse-rtf/src/java/org/apache/nutch/parse/rtf/RTFParseFactory.java
    lucene/nutch/trunk/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java
    lucene/nutch/trunk/src/plugin/parse-text/src/java/org/apache/nutch/parse/text/TextParser.java
    lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
    lucene/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java
    lucene/nutch/trunk/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
    lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/File.java
    lucene/nutch/trunk/src/plugin/protocol-file/src/java/org/apache/nutch/protocol/file/FileResponse.java
    lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
    lucene/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
    lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/Http.java
    lucene/nutch/trunk/src/plugin/protocol-http/src/java/org/apache/nutch/protocol/http/HttpResponse.java
    lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
    lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpAuthenticationFactory.java
    lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpBasicAuthentication.java
    lucene/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/HttpResponse.java
    lucene/nutch/trunk/src/plugin/query-basic/src/java/org/apache/nutch/searcher/basic/BasicQueryFilter.java
    lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/DateQueryFilter.java
    lucene/nutch/trunk/src/plugin/query-more/src/java/org/apache/nutch/searcher/more/TypeQueryFilter.java
    lucene/nutch/trunk/src/plugin/query-site/src/java/org/apache/nutch/searcher/site/SiteQueryFilter.java
    lucene/nutch/trunk/src/plugin/query-url/src/java/org/apache/nutch/searcher/url/URLQueryFilter.java
    lucene/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/net/PrefixURLFilter.java
    lucene/nutch/trunk/src/plugin/urlfilter-regex/src/java/org/apache/nutch/net/RegexURLFilter.java
    lucene/nutch/trunk/src/test/org/apache/nutch/analysis/TestQueryParser.java
    lucene/nutch/trunk/src/test/org/apache/nutch/fs/TestNutchFileSystem.java
    lucene/nutch/trunk/src/test/org/apache/nutch/io/TestArrayFile.java
    lucene/nutch/trunk/src/test/org/apache/nutch/io/TestSequenceFile.java
    lucene/nutch/trunk/src/test/org/apache/nutch/io/TestSetFile.java
    lucene/nutch/trunk/src/test/org/apache/nutch/io/TestWritable.java
    lucene/nutch/trunk/src/test/org/apache/nutch/ipc/TestIPC.java
    lucene/nutch/trunk/src/test/org/apache/nutch/ipc/TestRPC.java
    lucene/nutch/trunk/src/test/org/apache/nutch/mapred/TestSequenceFileInputFormat.java
    lucene/nutch/trunk/src/test/org/apache/nutch/mapred/TestTextInputFormat.java
    lucene/nutch/trunk/src/test/org/apache/nutch/ndfs/TestNDFS.java
    lucene/nutch/trunk/src/test/org/apache/nutch/net/TestBasicUrlNormalizer.java
    lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java
    lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java
    lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParserFactory.java
    lucene/nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java
    lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java
    lucene/nutch/trunk/src/test/org/apache/nutch/protocol/TestContentProperties.java
    lucene/nutch/trunk/src/test/org/apache/nutch/searcher/TestQuery.java
    lucene/nutch/trunk/src/web/jsp/anchors.jsp
    lucene/nutch/trunk/src/web/jsp/cached.jsp
    lucene/nutch/trunk/src/web/jsp/explain.jsp
    lucene/nutch/trunk/src/web/jsp/refine-query-init.jsp
    lucene/nutch/trunk/src/web/jsp/search.jsp
    lucene/nutch/trunk/src/web/jsp/text.jsp

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/AnalyzerFactory.java Tue Jan 31 08:08:58 2006
@@ -26,6 +26,7 @@
 import org.apache.nutch.plugin.PluginRepository;
 import org.apache.nutch.plugin.PluginRuntimeException;
 import org.apache.nutch.util.LogFormatter;
+import org.apache.nutch.util.NutchConf;
 
 
 /**
@@ -38,25 +39,21 @@
   public final static Logger LOG =
           LogFormatter.getLogger(AnalyzerFactory.class.getName());
 
-  private final static ExtensionPoint X_POINT = 
-          PluginRepository.getInstance()
-                          .getExtensionPoint(NutchAnalyzer.X_POINT_ID);
 
-  private final static Map CACHE = new HashMap();
-
-  private final static NutchAnalyzer DEFAULT_ANALYZER = 
-                                            new NutchDocumentAnalyzer();
-  
+  private NutchAnalyzer DEFAULT_ANALYZER;
   
-  static {
-    if (X_POINT == null) {
-      throw new RuntimeException("x point " + NutchAnalyzer.X_POINT_ID +
-                                 " not found.");
-    }
-  }
-
+  private ExtensionPoint extensionPoint;
+  private NutchConf nutchConf;
 
-  private AnalyzerFactory() {}
+  public AnalyzerFactory (NutchConf nutchConf) {
+      DEFAULT_ANALYZER = new NutchDocumentAnalyzer(nutchConf);
+      this.nutchConf = nutchConf;
+      this.extensionPoint = nutchConf.getPluginRepository().getExtensionPoint(NutchAnalyzer.X_POINT_ID);
+      if(this.extensionPoint == null) {
+          throw new RuntimeException("x point " + NutchAnalyzer.X_POINT_ID +
+          " not found.");
+      }
+  }
 
   
   /**
@@ -67,7 +64,7 @@
    * plugin found whose "lang" attribute equals the specified lang parameter is
    * used. If none match, then the {@link NutchDocumentAnalyzer} is used.
    */
-  public static NutchAnalyzer get(String lang) {
+  public NutchAnalyzer get(String lang) {
 
     NutchAnalyzer analyzer = DEFAULT_ANALYZER;
     Extension extension = getExtension(lang);
@@ -81,20 +78,20 @@
     return analyzer;
   }
 
-  private static Extension getExtension(String lang) {
+  private Extension getExtension(String lang) {
 
-    Extension extension = (Extension) CACHE.get(lang);
+    Extension extension = (Extension) this.nutchConf.getObject(lang);
     if (extension == null) {
       extension = findExtension(lang);
-      CACHE.put(lang, extension);
+      this.nutchConf.setObject(lang, extension);
     }
     return extension;
   }
 
-  private static Extension findExtension(String lang) {
+  private Extension findExtension(String lang) {
 
     if (lang != null) {
-      Extension[] extensions = X_POINT.getExtentens();
+      Extension[] extensions = this.extensionPoint.getExtensions();
       for (int i=0; i<extensions.length; i++) {
         if (lang.equals(extensions[i].getAttribute("lang"))) {
           return extensions[i];

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java Tue Jan 31 08:08:58 2006
@@ -37,11 +37,15 @@
   private static final Logger LOG =
     LogFormatter.getLogger("org.apache.nutch.analysis.CommonGrams");
   private static final char SEPARATOR = '-';
-  private static final HashMap COMMON_TERMS = new HashMap();
-
-  static { init(); }
-
-  private CommonGrams() {}                        // no public ctor
+  private HashMap COMMON_TERMS = new HashMap();
+  
+  /**
+   * The constructor.
+   * @param nutchConf
+   */
+  public CommonGrams(NutchConf nutchConf) {
+      init(nutchConf);
+  }
 
   private static class Filter extends TokenFilter {
     private HashSet common;
@@ -130,10 +134,10 @@
   }
 
   /** Construct using the provided config file. */
-  private static void init() {
+  private void init(NutchConf nutchConf) {
     try {
-      Reader reader = NutchConf.get().getConfResourceAsReader
-        (NutchConf.get().get("analysis.common.terms.file"));
+      Reader reader = nutchConf.getConfResourceAsReader
+        (nutchConf.get("analysis.common.terms.file"));
       BufferedReader in = new BufferedReader(reader);
       String line;
       while ((line = in.readLine()) != null) {
@@ -170,7 +174,7 @@
 
   /** Construct a token filter that inserts n-grams for common terms.  For use
    * while indexing documents.  */
-  public static TokenFilter getFilter(TokenStream ts, String field) {
+  public TokenFilter getFilter(TokenStream ts, String field) {
     return new Filter(ts, (HashSet)COMMON_TERMS.get(field));
   }
 
@@ -179,8 +183,10 @@
     private Term[] terms;
     private int index;
 
-    public ArrayTokens(Phrase phrase) { this.terms = phrase.getTerms(); }
-    
+    public ArrayTokens(Phrase phrase) {
+      this.terms = phrase.getTerms();
+    }
+
     public Token next() {
       if (index == terms.length)
         return null;
@@ -190,7 +196,7 @@
   }
 
   /** Optimizes phrase queries to use n-grams when possible. */
-  public static String[] optimizePhrase(Phrase phrase, String field) {
+  public String[] optimizePhrase(Phrase phrase, String field) {
     //LOG.info("Optimizing " + phrase + " for " + field);
     ArrayList result = new ArrayList();
     TokenStream ts = getFilter(new ArrayTokens(phrase), field);
@@ -211,17 +217,10 @@
     if (prev != null)
       result.add(prev.termText());
 
-//     LOG.info("Optimized: ");
-//     for (int i = 0; i < result.size(); i++) {
-//       LOG.info(result.get(i) + " ");
-//     }
-
     return (String[])result.toArray(new String[result.size()]);
-
-
   }
 
-  private static int arity(String gram) {
+  private int arity(String gram) {
     int index = 0;
     int arity = 0;
     while ((index = gram.indexOf(SEPARATOR, index+1)) != -1) {
@@ -237,14 +236,14 @@
       text.append(args[i]);
       text.append(' ');
     }
-    TokenStream ts =
-      new NutchDocumentTokenizer(new StringReader(text.toString()));
-    ts = getFilter(ts, "url");
+    TokenStream ts = new NutchDocumentTokenizer(new StringReader(text.toString()));
+    CommonGrams commonGrams = new CommonGrams(new NutchConf());
+    ts = commonGrams.getFilter(ts, "url");
     Token token;
     while ((token = ts.next()) != null) {
       System.out.println("Token: " + token);
     }
-    String[] optimized = optimizePhrase(new Phrase(args), "url");
+    String[] optimized = commonGrams.optimizePhrase(new Phrase(args), "url");
     System.out.print("Optimized: ");
     for (int i = 0; i < optimized.length; i++) {
       System.out.print(optimized[i] + " ");

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.java Tue Jan 31 08:08:58 2006
@@ -4,7 +4,8 @@
 import org.apache.nutch.searcher.Query;
 import org.apache.nutch.searcher.QueryFilters;
 import org.apache.nutch.searcher.Query.Clause;
-
+import org.apache.nutch.searcher.Query.Clause;
+import org.apache.nutch.util.NutchConf;
 import org.apache.lucene.analysis.StopFilter;
 
 import java.io.*;
@@ -24,6 +25,8 @@
   private static final Set STOP_SET = StopFilter.makeStopSet(STOP_WORDS);
 
   private String queryString;
+  private QueryFilters queryFilters;
+
 
   /** True iff word is a stop word.  Stop words are only removed from queries.
    * Every word is indexed.  */
@@ -32,11 +35,12 @@
   }
 
   /** Construct a query parser for the text in a reader. */
-  public static Query parseQuery(String queryString) throws IOException {
+  public static Query parseQuery(String queryString, NutchConf nutchConf) throws IOException {
     NutchAnalysis parser =
       new NutchAnalysis(new FastCharStream(new StringReader(queryString)));
     parser.queryString = queryString;
-    return parser.parse();
+    parser.queryFilters = new QueryFilters(nutchConf);
+    return parser.parse(nutchConf);
   }
 
   /** For debugging. */
@@ -45,13 +49,13 @@
     while (true) {
       System.out.print("Query: ");
       String line = in.readLine();
-      System.out.println(parseQuery(line));
+      System.out.println(parseQuery(line, new NutchConf()));
     }
   }
 
 /** Parse a query. */
-  final public Query parse() throws ParseException {
-  Query query = new Query();
+  final public Query parse(NutchConf nutchConf) throws ParseException {
+  Query query = new Query(nutchConf);
   ArrayList terms;
   Token token;
   String field;
@@ -213,7 +217,7 @@
       jj_consume_token(-1);
       throw new ParseException();
     }
-    if (QueryFilters.isRawField(field)) {
+    if (this.queryFilters.isRawField(field)) {
       result.clear();
       result.add(queryString.substring(start, end));
     }
@@ -259,7 +263,7 @@
       term = term();
                     result.add(term);
     }
-    if (QueryFilters.isRawField(field)) {
+    if (this.queryFilters.isRawField(field)) {
       result.clear();
       result.add(queryString.substring(start, token.endColumn));
     }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.jj
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.jj?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.jj (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysis.jj Tue Jan 31 08:08:58 2006
@@ -31,7 +31,8 @@
 import org.apache.nutch.searcher.Query;
 import org.apache.nutch.searcher.QueryFilters;
 import org.apache.nutch.searcher.Query.Clause;
-
+import org.apache.nutch.searcher.Query.Clause;
+import org.apache.nutch.util.NutchConf;
 import org.apache.lucene.analysis.StopFilter;
 
 import java.io.*;
@@ -51,6 +52,8 @@
   private static final Set STOP_SET = StopFilter.makeStopSet(STOP_WORDS);
 
   private String queryString;
+  private QueryFilters queryFilters;
+  
 
   /** True iff word is a stop word.  Stop words are only removed from queries.
    * Every word is indexed.  */
@@ -59,11 +62,12 @@
   }
 
   /** Construct a query parser for the text in a reader. */
-  public static Query parseQuery(String queryString) throws IOException {
+  public static Query parseQuery(String queryString, NutchConf nutchConf) throws IOException {
     NutchAnalysis parser =
       new NutchAnalysis(new FastCharStream(new StringReader(queryString)));
     parser.queryString = queryString;
-    return parser.parse();
+    parser.queryFilters = new QueryFilters(nutchConf);
+    return parser.parse(nutchConf);
   }
 
   /** For debugging. */
@@ -72,7 +76,7 @@
     while (true) {
       System.out.print("Query: ");
       String line = in.readLine();
-      System.out.println(parseQuery(line));
+      System.out.println(parseQuery(line, new NutchConf()));
     }
   }
 
@@ -174,9 +178,9 @@
 
 
 /** Parse a query. */
-Query parse() :
+Query parse(NutchConf nutchConf) :
 {
-  Query query = new Query();
+  Query query = new Query(nutchConf);
   ArrayList terms;
   Token token;
   String field;
@@ -245,7 +249,7 @@
   (<QUOTE>|<EOF>)
     
   {
-    if (QueryFilters.isRawField(field)) {
+    if (this.queryFilters.isRawField(field)) {
       result.clear();
       result.add(queryString.substring(start, end));
     }
@@ -272,7 +276,7 @@
     term = term() { result.add(term); })*
 
   {
-    if (QueryFilters.isRawField(field)) {
+    if (this.queryFilters.isRawField(field)) {
       result.clear();
       result.add(queryString.substring(start, token.endColumn));
     }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysisTokenManager.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysisTokenManager.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysisTokenManager.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchAnalysisTokenManager.java Tue Jan 31 08:08:58 2006
@@ -3,6 +3,8 @@
 import org.apache.nutch.searcher.Query;
 import org.apache.nutch.searcher.QueryFilters;
 import org.apache.nutch.searcher.Query.Clause;
+import org.apache.nutch.searcher.Query.Clause;
+import org.apache.nutch.util.NutchConf;
 import org.apache.lucene.analysis.StopFilter;
 import java.io.*;
 import java.util.*;

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/NutchDocumentAnalyzer.java Tue Jan 31 08:08:58 2006
@@ -24,40 +24,60 @@
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
-
+import org.apache.nutch.mapred.JobConf;
+import org.apache.nutch.util.NutchConf;
 
 /**
- * The analyzer used for Nutch documents.
- * Uses the JavaCC-defined lexical analyzer {@link NutchDocumentTokenizer},
- * with no stop list.  This keeps it consistent with query parsing.
+ * The analyzer used for Nutch documents. Uses the JavaCC-defined lexical
+ * analyzer {@link NutchDocumentTokenizer}, with no stop list. This keeps it
+ * consistent with query parsing.
  */
 public class NutchDocumentAnalyzer extends NutchAnalyzer {
 
   /** Analyzer used to index textual content. */
-  private static class ContentAnalyzer extends Analyzer {
-    /** Constructs a {@link NutchDocumentTokenizer}. */
-    public TokenStream tokenStream(String field, Reader reader) {
-      return CommonGrams.getFilter(new NutchDocumentTokenizer(reader), field);
-    }
+  private static Analyzer CONTENT_ANALYZER;
+  // Anchor Analysis
+  // Like content analysis, but leave gap between anchors to inhibit
+  // cross-anchor phrase matching.
+  /**
+   * The number of unused term positions between anchors in the anchor field.
+   */
+  public static final int INTER_ANCHOR_GAP = 4;
+  /** Analyzer used to analyze anchors. */
+  private static Analyzer ANCHOR_ANALYZER;
+  private NutchConf nutchConf;
+
+  /**
+   * @param conf
+   */
+  public NutchDocumentAnalyzer(NutchConf conf) {
+    this.nutchConf = conf;
+    CONTENT_ANALYZER = new ContentAnalyzer(conf);
+    ANCHOR_ANALYZER = new AnchorAnalyzer();
   }
 
   /** Analyzer used to index textual content. */
-  public static final Analyzer CONTENT_ANALYZER = new ContentAnalyzer();
+  private static class ContentAnalyzer extends Analyzer {
+    private CommonGrams commonGrams;
 
-  // Anchor Analysis
-  // Like content analysis, but leave gap between anchors to inhibit
-  // cross-anchor phrase matching.
+    public ContentAnalyzer(NutchConf nutchConf) {
+      this.commonGrams = new CommonGrams(nutchConf);
+    }
 
-  /** The number of unused term positions between anchors in the anchor
-   * field. */
-  public static final int INTER_ANCHOR_GAP = 4;
+    /** Constructs a {@link NutchDocumentTokenizer}. */
+    public TokenStream tokenStream(String field, Reader reader) {
+      return this.commonGrams.getFilter(new NutchDocumentTokenizer(reader),
+          field);
+    }
+  }
 
   private static class AnchorFilter extends TokenFilter {
+    private boolean first = true;
+
     public AnchorFilter(TokenStream input) {
       super(input);
     }
 
-    private boolean first = true;
     public final Token next() throws IOException {
       Token result = input.next();
       if (result == null)
@@ -76,9 +96,6 @@
     }
   }
 
-  /** Analyzer used to analyze anchors. */
-  public static final Analyzer ANCHOR_ANALYZER = new AnchorAnalyzer();
-
   /** Returns a new token stream for text from the named field. */
   public TokenStream tokenStream(String fieldName, Reader reader) {
     Analyzer analyzer;
@@ -89,5 +106,4 @@
 
     return analyzer.tokenStream(fieldName, reader);
   }
-
 }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/clustering/OnlineClustererFactory.java Tue Jan 31 08:08:58 2006
@@ -30,11 +30,13 @@
 public class OnlineClustererFactory {
   public static final Logger LOG = LogFormatter
     .getLogger(OnlineClustererFactory.class.getName());
+  private ExtensionPoint extensionPoint;
+  private String extensionName;
 
-  private final static ExtensionPoint X_POINT = PluginRepository.getInstance()
-    .getExtensionPoint(OnlineClusterer.X_POINT_ID);
-
-  private OnlineClustererFactory() {}
+  public OnlineClustererFactory(NutchConf nutchConf) {
+      this.extensionPoint = nutchConf.getPluginRepository().getExtensionPoint(OnlineClusterer.X_POINT_ID);
+      this.extensionName = nutchConf.get("extension.clustering.extension-name");
+  }
 
   /**
   * @return Returns the online clustering extension specified
@@ -43,15 +45,14 @@
   * empty (no preference), the first available clustering extension is
   * returned.
   */
-  public static OnlineClusterer getOnlineClusterer()
+  public OnlineClusterer getOnlineClusterer()
     throws PluginRuntimeException {
 
-    if (X_POINT == null) {
+    if (this.extensionPoint == null) {
       // not even an extension point defined.
       return null;
     }
-
-    String extensionName = NutchConf.get().get("extension.clustering.extension-name");
+    
     if (extensionName != null) {
       Extension extension = findExtension(extensionName);
       if (extension != null) {
@@ -63,7 +64,7 @@
       // not found, fallback to the default, if available.
     }
 
-    Extension[] extensions = X_POINT.getExtensions();
+    Extension[] extensions = this.extensionPoint.getExtensions();
     if (extensions.length > 0) {
       LOG.info("Using the first clustering extension found: "
         + extensions[0].getId());
@@ -73,10 +74,10 @@
     }
   }
 
-  private static Extension findExtension(String name)
+  private Extension findExtension(String name)
     throws PluginRuntimeException {
 
-    Extension[] extensions = X_POINT.getExtensions();
+    Extension[] extensions = this.extensionPoint.getExtensions();
 
     for (int i = 0; i < extensions.length; i++) {
       Extension extension = extensions[i];

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Crawl.java Tue Jan 31 08:08:58 2006
@@ -41,9 +41,6 @@
       (new Date(System.currentTimeMillis()));
   }
 
-  static {
-    NutchConf.get().addConfResource("crawl-tool.xml");
-  }
 
   /* Perform complete crawling and indexing given a set of root urls. */
   public static void main(String args[]) throws Exception {
@@ -53,8 +50,9 @@
       return;
     }
 
-    JobConf conf = new JobConf(NutchConf.get());
-    //conf.addConfResource("crawl-tool.xml");
+    NutchConf nutchConf = new NutchConf();
+    nutchConf.addConfResource("crawl-tool.xml");
+    JobConf conf = new JobConf(nutchConf);
 
     File rootUrlDir = null;
     File dir = new File("crawl-" + getDate());
@@ -120,7 +118,7 @@
     // index, dedup & merge
     new Indexer(conf).index(indexes, crawlDb, linkDb, fs.listFiles(segments));
     new DeleteDuplicates(conf).dedup(new File[] { indexes });
-    new IndexMerger(fs, fs.listFiles(indexes), index, tmpDir).merge();
+    new IndexMerger(fs, fs.listFiles(indexes), index, tmpDir, nutchConf).merge();
 
     LOG.info("crawl finished: " + dir);
   }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDb.java Tue Jan 31 08:08:58 2006
@@ -89,7 +89,7 @@
   }
 
   public static void main(String[] args) throws Exception {
-    CrawlDb crawlDb = new CrawlDb(NutchConf.get());
+    CrawlDb crawlDb = new CrawlDb(new NutchConf());
     
     if (args.length < 2) {
       System.err.println("Usage: <crawldb> <segment>");

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java Tue Jan 31 08:08:58 2006
@@ -157,8 +157,8 @@
     JobClient.runJob(job);
 
     // reading the result
-    NutchFileSystem fileSystem = NutchFileSystem.get();
-    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(fileSystem, tmpFolder);
+    NutchFileSystem fileSystem = NutchFileSystem.get(config);
+    SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(config, tmpFolder);
 
     UTF8 key = new UTF8();
     LongWritable value = new LongWritable();
@@ -210,7 +210,7 @@
     NutchFileSystem fs = NutchFileSystem.get(config);
     UTF8 key = new UTF8(url);
     CrawlDatum val = new CrawlDatum();
-    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new File(crawlDb, CrawlDatum.DB_DIR_NAME));
+    MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new File(crawlDb, CrawlDatum.DB_DIR_NAME), config);
     Writable res = MapFileOutputFormat.getEntry(readers, new HashPartitioner(), key, val);
     System.out.println("URL: " + url);
     if (res != null) {
@@ -254,7 +254,7 @@
     }
     String param = null;
     String crawlDb = args[0];
-    NutchConf conf = NutchConf.get();
+    NutchConf conf = new NutchConf();
     for (int i = 1; i < args.length; i++) {
       if (args[i].equals("-stats")) {
         dbr.processStatJob(crawlDb, conf);

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Tue Jan 31 08:08:58 2006
@@ -259,7 +259,7 @@
 
     if (topN != Long.MAX_VALUE)
       LOG.info("topN: " + topN);
-    Generator gen = new Generator(NutchConf.get());
+    Generator gen = new Generator(new NutchConf());
     gen.generate(dbDir, segmentsDir, numFetchers, topN, curTime);
   }
 }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Tue Jan 31 08:08:58 2006
@@ -35,11 +35,14 @@
 
   /** Normalize and filter injected urls. */
   public static class InjectMapper implements Mapper {
-    private UrlNormalizer urlNormalizer = UrlNormalizerFactory.getNormalizer();
+    private UrlNormalizer urlNormalizer;
     private float interval;
+    private JobConf jobConf;
 
     public void configure(JobConf job) {
+      urlNormalizer = new UrlNormalizerFactory(job).getNormalizer();
       interval = job.getFloat("db.default.fetch.interval", 30f);
+      this.jobConf = job;
     }
 
     public void map(WritableComparable key, Writable val,
@@ -47,9 +50,11 @@
       throws IOException {
       UTF8 value = (UTF8)val;
       String url = value.toString();              // value is line of text
+      // System.out.println("url: " +url);
       try {
         url = urlNormalizer.normalize(url);       // normalize the url
-        url = URLFilters.filter(url);             // filter the url
+        URLFilters filters = new URLFilters(this.jobConf);
+        url = filters.filter(url);             // filter the url
       } catch (Exception e) {
         LOG.warning("Skipping " +url+":"+e);
         url = null;
@@ -116,7 +121,7 @@
   }
 
   public static void main(String[] args) throws Exception {
-    Injector injector = new Injector(NutchConf.get());
+    Injector injector = new Injector(new NutchConf());
     
     if (args.length < 2) {
       System.err.println("Usage: Injector <crawldb> <url_dir>");

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Tue Jan 31 08:08:58 2006
@@ -182,7 +182,7 @@
   }
 
   public static void main(String[] args) throws Exception {
-    LinkDb linkDb = new LinkDb(NutchConf.get());
+    LinkDb linkDb = new LinkDb(new NutchConf());
     
     if (args.length < 2) {
       System.err.println("Usage: <linkdb> <segments>");

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java Tue Jan 31 08:08:58 2006
@@ -37,10 +37,12 @@
   private NutchFileSystem fs;
   private File directory;
   private MapFile.Reader[] readers;
+  private NutchConf nutchConf;
 
-  public LinkDbReader(NutchFileSystem fs, File directory) {
+  public LinkDbReader(NutchFileSystem fs, File directory, NutchConf nutchConf) {
     this.fs = fs;
     this.directory = directory;
+    this.nutchConf = nutchConf;
   }
 
   public String[] getAnchors(UTF8 url) throws IOException {
@@ -55,7 +57,7 @@
     synchronized (this) {
       if (readers == null) {
         readers = MapFileOutputFormat.getReaders
-          (fs, new File(directory, LinkDb.CURRENT_NAME));
+          (fs, new File(directory, LinkDb.CURRENT_NAME), this.nutchConf);
       }
     }
     
@@ -90,11 +92,11 @@
       System.err.println("\t-url <url>\tprint information about <url> to System.out");
       return;
     }
-    
+    NutchConf nutchConf = new NutchConf();
     if (args[1].equals("-dump")) {
-      LinkDbReader.processDumpJob(args[0], args[2], NutchConf.get());
+      LinkDbReader.processDumpJob(args[0], args[2], nutchConf);
     } else if (args[1].equals("-url")) {
-      LinkDbReader dbr = new LinkDbReader(NutchFileSystem.get(), new File(args[0]));
+      LinkDbReader dbr = new LinkDbReader(NutchFileSystem.get(new NutchConf()), new File(args[0]), nutchConf);
       Inlinks links = dbr.getInlinks(new UTF8(args[2]));
       if (links == null) {
         System.out.println(" - no link information.");

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/TextProfileSignature.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/TextProfileSignature.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/TextProfileSignature.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/TextProfileSignature.java Tue Jan 31 08:08:58 2006
@@ -157,7 +157,7 @@
   
   public static void main(String[] args) throws Exception {
     TextProfileSignature sig = new TextProfileSignature();
-    sig.setConf(NutchConf.get());
+    sig.setConf(new NutchConf());
     HashMap res = new HashMap();
     File[] files = new File(args[0]).listFiles();
     for (int i = 0; i < files.length; i++) {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Tue Jan 31 08:08:58 2006
@@ -73,9 +73,18 @@
   private boolean parsing;
 
   private class FetcherThread extends Thread {
-    public FetcherThread() {
+    private NutchConf nutchConf;
+    private URLFilters urlFilters;
+    private ParseUtil parseUtil;
+    private ProtocolFactory protocolFactory;
+
+    public FetcherThread(NutchConf nutchConf) {
       this.setDaemon(true);                       // don't hang JVM on exit
       this.setName("FetcherThread");              // use an informative name
+      this.nutchConf = nutchConf;
+      this.urlFilters = new URLFilters(nutchConf);
+      this.parseUtil = new ParseUtil(nutchConf);
+      this.protocolFactory = new ProtocolFactory(nutchConf);
     }
 
     public void run() {
@@ -112,7 +121,7 @@
             do {
               redirecting = false;
               LOG.fine("redirectCount=" + redirectCount);
-              Protocol protocol = ProtocolFactory.getProtocol(url);
+              Protocol protocol = this.protocolFactory.getProtocol(url);
               ProtocolOutput output = protocol.getProtocolOutput(key, datum);
               ProtocolStatus status = output.getStatus();
               Content content = output.getContent();
@@ -127,7 +136,7 @@
               case ProtocolStatus.MOVED:         // redirect
               case ProtocolStatus.TEMP_MOVED:
                 String newUrl = status.getMessage();
-                newUrl = URLFilters.filter(newUrl);
+                newUrl = this.urlFilters.filter(newUrl);
                 if (newUrl != null && !newUrl.equals(url)) {
                   url = newUrl;
                   redirecting = true;
@@ -196,7 +205,7 @@
 
       if (content == null) {
         String url = key.toString();
-        content = new Content(url, url, new byte[0], "", new ContentProperties());
+        content = new Content(url, url, new byte[0], "", new ContentProperties(), this.nutchConf);
       }
 
       content.getMetadata().setProperty           // add segment to metadata
@@ -208,14 +217,14 @@
       if (parsing && status == CrawlDatum.STATUS_FETCH_SUCCESS) {
         ParseStatus parseStatus;
         try {
-          parse = ParseUtil.parse(content);
+          parse = this.parseUtil.parse(content);
           parseStatus = parse.getData().getStatus();
         } catch (Exception e) {
           parseStatus = new ParseStatus(e);
         }
         if (!parseStatus.isSuccess()) {
           LOG.warning("Error parsing: " + key + ": " + parseStatus);
-          parse = parseStatus.getEmptyParse();
+          parse = parseStatus.getEmptyParse(getConf());
         }
         // Calculate page signature. For non-parsing fetchers this will
         // be done in ParseSegment
@@ -295,7 +304,7 @@
     LOG.info("Fetcher: threads: " + threadCount);
 
     for (int i = 0; i < threadCount; i++) {       // spawn threads
-      new FetcherThread().start();
+      new FetcherThread(getConf()).start();
     }
 
     // select a timeout that avoids a task timeout
@@ -361,7 +370,7 @@
       
     File segment = new File(args[0]);
 
-    NutchConf conf = NutchConf.get();
+    NutchConf conf = new NutchConf();
 
     int threads = conf.getInt("fetcher.threads.fetch", 10);
     boolean parsing = true;

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutput.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutput.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutput.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherOutput.java Tue Jan 31 08:08:58 2006
@@ -22,12 +22,15 @@
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.parse.*;
+import org.apache.nutch.util.NutchConf;
+import org.apache.nutch.util.NutchConfigurable;
 
 /* An entry in the fetcher's output. */
-public final class FetcherOutput implements Writable {
+public final class FetcherOutput implements Writable, NutchConfigurable {
   private CrawlDatum crawlDatum;
   private Content content;
   private ParseImpl parse;
+  private NutchConf nutchConf;
 
   public FetcherOutput() {}
 
@@ -41,7 +44,7 @@
   public final void readFields(DataInput in) throws IOException {
     this.crawlDatum = CrawlDatum.read(in);
     this.content = in.readBoolean() ? Content.read(in) : null;
-    this.parse = in.readBoolean() ? ParseImpl.read(in) : null;
+    this.parse = in.readBoolean() ? ParseImpl.read(in, this.nutchConf) : null;
   }
 
   public final void write(DataOutput out) throws IOException {
@@ -75,6 +78,14 @@
     StringBuffer buffer = new StringBuffer();
     buffer.append("CrawlDatum: " + crawlDatum+"\n" );
     return buffer.toString();
+  }
+
+  public void setConf(NutchConf conf) {
+    this.nutchConf = conf;
+  }
+
+  public NutchConf getConf() {
+    return this.nutchConf;
   }
 
 }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fs/FileUtil.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fs/FileUtil.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fs/FileUtil.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fs/FileUtil.java Tue Jan 31 08:08:58 2006
@@ -28,8 +28,8 @@
      * Delete a directory and all its contents.  If
      * we return false, the directory may be partially-deleted.
      */
-    public static boolean fullyDelete(File dir) throws IOException {
-        return fullyDelete(new LocalFileSystem(), dir);
+    public static boolean fullyDelete(File dir, NutchConf nutchConf) throws IOException {
+        return fullyDelete(new LocalFileSystem(nutchConf), dir);
     }
     public static boolean fullyDelete(NutchFileSystem nfs, File dir) throws IOException {
         // 20041022, xing.
@@ -43,7 +43,7 @@
      * Copy a file's contents to a new location.
      * Returns whether a target file was overwritten
      */
-    public static boolean copyContents(NutchFileSystem nfs, File src, File dst, boolean overwrite) throws IOException {
+    public static boolean copyContents(NutchFileSystem nfs, File src, File dst, boolean overwrite, NutchConf nutchConf) throws IOException {
         if (nfs.exists(dst) && !overwrite) {
             return false;
         }
@@ -57,7 +57,7 @@
             NFSInputStream in = nfs.openRaw(src);
             try {
                 NFSOutputStream out = nfs.createRaw(dst, true);
-                byte buf[] = new byte[NutchConf.get().getInt("io.file.buffer.size", 4096)];
+                byte buf[] = new byte[nutchConf.getInt("io.file.buffer.size", 4096)];
                 try {
                     int readBytes = in.read(buf);
 
@@ -77,7 +77,7 @@
             if (contents != null) {
                 for (int i = 0; i < contents.length; i++) {
                     File newDst = new File(dst, contents[i].getName());
-                    if (! copyContents(nfs, contents[i], newDst, overwrite)) {
+                    if (! copyContents(nfs, contents[i], newDst, overwrite, nutchConf)) {
                         return false;
                     }
                 }
@@ -90,7 +90,7 @@
      * Copy a file and/or directory and all its contents (whether
      * data or other files/dirs)
      */
-    public static void recursiveCopy(NutchFileSystem nfs, File src, File dst) throws IOException {
+    public static void recursiveCopy(NutchFileSystem nfs, File src, File dst, NutchConf nutchConf) throws IOException {
         //
         // Resolve the real target.
         //
@@ -107,7 +107,7 @@
             //
             // If the source is a file, then just copy the contents
             //
-            copyContents(nfs, src, dst, true);
+            copyContents(nfs, src, dst, true, nutchConf);
         } else {
             //
             // If the source is a dir, then we need to copy all the subfiles.
@@ -115,7 +115,7 @@
             nfs.mkdirs(dst);
             File contents[] = nfs.listFiles(src);
             for (int i = 0; i < contents.length; i++) {
-                recursiveCopy(nfs, contents[i], new File(dst, contents[i].getName()));
+                recursiveCopy(nfs, contents[i], new File(dst, contents[i].getName()), nutchConf);
             }
         }
     }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fs/LocalFileSystem.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fs/LocalFileSystem.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fs/LocalFileSystem.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fs/LocalFileSystem.java Tue Jan 31 08:08:58 2006
@@ -23,6 +23,7 @@
 import org.apache.nutch.ndfs.NDFSFile;
 import org.apache.nutch.ndfs.DF;
 import org.apache.nutch.ndfs.NDFSFileInfo;
+import org.apache.nutch.util.NutchConf;
 import org.apache.nutch.io.UTF8;
 
 /****************************************************************
@@ -38,11 +39,11 @@
     TreeMap lockObjSet = new TreeMap();
     // by default use copy/delete instead of rename
     boolean useCopyForRename = true;
-
+    
     /**
      */
-    public LocalFileSystem() throws IOException {
-        super();
+    public LocalFileSystem(NutchConf nutchConf) throws IOException {
+        super(nutchConf);
         // if you find an OS which reliably supports non-POSIX
         // rename(2) across filesystems / volumes, you can
         // uncomment this.
@@ -175,7 +176,7 @@
      */
     public boolean renameRaw(File src, File dst) throws IOException {
         if (useCopyForRename) {
-            FileUtil.copyContents(this, src, dst, true);
+            FileUtil.copyContents(this, src, dst, true, nutchConf);
             return fullyDelete(src);
         } else return src.renameTo(dst);
     }
@@ -288,7 +289,7 @@
     public void moveFromLocalFile(File src, File dst) throws IOException {
         if (! src.equals(dst)) {
             if (useCopyForRename) {
-                FileUtil.copyContents(this, src, dst, true);
+                FileUtil.copyContents(this, src, dst, true, this.nutchConf);
                 fullyDelete(src);
             } else src.renameTo(dst);
         }
@@ -299,7 +300,7 @@
      */
     public void copyFromLocalFile(File src, File dst) throws IOException {
         if (! src.equals(dst)) {
-            FileUtil.copyContents(this, src, dst, true);
+            FileUtil.copyContents(this, src, dst, true, this.nutchConf);
         }
     }
 
@@ -308,7 +309,7 @@
      */
     public void copyToLocalFile(File src, File dst) throws IOException {
         if (! src.equals(dst)) {
-            FileUtil.copyContents(this, src, dst, true);
+            FileUtil.copyContents(this, src, dst, true, this.nutchConf);
         }
     }
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fs/NDFSFileSystem.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fs/NDFSFileSystem.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fs/NDFSFileSystem.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fs/NDFSFileSystem.java Tue Jan 31 08:08:58 2006
@@ -43,8 +43,9 @@
      * Create the ShareSet automatically, and then go on to
      * the regular constructor.
      */
-    public NDFSFileSystem(InetSocketAddress namenode) throws IOException {
-      this.ndfs = new NDFSClient(namenode);
+    public NDFSFileSystem(InetSocketAddress namenode, NutchConf nutchConf) throws IOException {
+      super(nutchConf);
+      this.ndfs = new NDFSClient(namenode, nutchConf);
       this.name = namenode.getHostName() + ":" + namenode.getPort();
     }
 
@@ -172,7 +173,7 @@
                 doFromLocalFile(contents[i], new File(dst, contents[i].getName()), deleteSource);
             }
         } else {
-            byte buf[] = new byte[NutchConf.get().getInt("io.file.buffer.size", 4096)];
+            byte buf[] = new byte[this.nutchConf.getInt("io.file.buffer.size", 4096)];
             InputStream in = new BufferedInputStream(new FileInputStream(src));
             try {
                 OutputStream out = create(dst);
@@ -217,10 +218,10 @@
                 copyToLocalFile(contents[i], new File(dst, contents[i].getName()));
             }
         } else {
-            byte buf[] = new byte[NutchConf.get().getInt("io.file.buffer.size", 4096)];
+            byte buf[] = new byte[this.nutchConf.getInt("io.file.buffer.size", 4096)];
             InputStream in = open(src);
             try {
-                OutputStream out = NutchFileSystem.getNamed("local").create(dst);
+                OutputStream out = NutchFileSystem.getNamed("local", this.nutchConf).create(dst);
                 try {
                     int bytesRead = in.read(buf);
                     while (bytesRead >= 0) {
@@ -267,7 +268,7 @@
      */
     public void completeLocalInput(File localFile) throws IOException {
         // Get rid of the local copy - we don't need it anymore.
-        FileUtil.fullyDelete(localFile);
+        FileUtil.fullyDelete(localFile, this.nutchConf);
     }
 
     /**

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fs/NDFSShell.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fs/NDFSShell.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fs/NDFSShell.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fs/NDFSShell.java Tue Jan 31 08:08:58 2006
@@ -122,8 +122,8 @@
     /**
      * Copy an NDFS file
      */
-    public void copy(String srcf, String dstf) throws IOException {
-        if (FileUtil.copyContents(nfs, new File(srcf), new File(dstf), true)) {
+    public void copy(String srcf, String dstf, NutchConf nutchConf) throws IOException {
+        if (FileUtil.copyContents(nfs, new File(srcf), new File(dstf), true, nutchConf)) {
             System.out.println("Copied " + srcf + " to " + dstf);
         } else {
             System.out.println("Copy failed");
@@ -224,8 +224,9 @@
             return;
         }
 
+        NutchConf nutchConf = new NutchConf();
         int i = 0;
-        NutchFileSystem nfs = NutchFileSystem.parseArgs(argv, i);
+        NutchFileSystem nfs = NutchFileSystem.parseArgs(argv, i, nutchConf);
         try {
             NDFSShell tc = new NDFSShell(nfs);
 
@@ -244,7 +245,7 @@
             } else if ("-mv".equals(cmd)) {
                 tc.rename(argv[i++], argv[i++]);
             } else if ("-cp".equals(cmd)) {
-                tc.copy(argv[i++], argv[i++]);
+                tc.copy(argv[i++], argv[i++], nutchConf);
             } else if ("-rm".equals(cmd)) {
                 tc.delete(argv[i++]);
             } else if ("-du".equals(cmd)) {

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fs/NFSDataInputStream.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fs/NFSDataInputStream.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fs/NFSDataInputStream.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fs/NFSDataInputStream.java Tue Jan 31 08:08:58 2006
@@ -40,7 +40,7 @@
     private Checksum sum = new CRC32();
     private int inSum;
 
-    public Checker(NutchFileSystem fs, File file)
+    public Checker(NutchFileSystem fs, File file, NutchConf nutchConf)
       throws IOException {
       super(fs.openRaw(file));
       
@@ -48,7 +48,7 @@
       this.file = file;
       File sumFile = fs.getChecksumFile(file);
       try {
-        this.sums = new NFSDataInputStream(fs.openRaw(sumFile));
+        this.sums = new NFSDataInputStream(fs.openRaw(sumFile), nutchConf);
         byte[] version = new byte[VERSION.length];
         sums.readFully(version);
         if (!Arrays.equals(version, VERSION))
@@ -210,20 +210,25 @@
     }
 
 }
-
-  public NFSDataInputStream(NutchFileSystem fs, File file) throws IOException {
-    this(fs, file, NutchConf.get().getInt("io.file.buffer.size", 4096));
+  
+  
+  public NFSDataInputStream(NutchFileSystem fs, File file, int bufferSize, NutchConf nutchConf)
+      throws IOException {
+    super(null);
+    this.in = new Buffer(new PositionCache(new Checker(fs, file, nutchConf)), bufferSize);
   }
-
-  public NFSDataInputStream(NutchFileSystem fs, File file, int bufferSize)
+  
+  
+  public NFSDataInputStream(NutchFileSystem fs, File file, NutchConf nutchConf)
     throws IOException {
     super(null);
-    this.in = new Buffer(new PositionCache(new Checker(fs, file)), bufferSize);
+    int bufferSize = nutchConf.getInt("io.file.buffer.size", 4096);
+    this.in = new Buffer(new PositionCache(new Checker(fs, file, nutchConf)), bufferSize);
   }
     
   /** Construct without checksums. */
-  public NFSDataInputStream(NFSInputStream in) throws IOException {
-    this(in, NutchConf.get().getInt("io.file.buffer.size", 4096));
+  public NFSDataInputStream(NFSInputStream in, NutchConf nutchConf) throws IOException {
+    this(in, nutchConf.getInt("io.file.buffer.size", 4096));
   }
   /** Construct without checksums. */
   public NFSDataInputStream(NFSInputStream in, int bufferSize)

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fs/NFSDataOutputStream.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fs/NFSDataOutputStream.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fs/NFSDataOutputStream.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fs/NFSDataOutputStream.java Tue Jan 31 08:08:58 2006
@@ -29,29 +29,27 @@
   /** Store checksums for data. */
   private static class Summer extends FilterOutputStream {
 
-    private final int bytesPerSum
-      = NutchConf.get().getInt("io.bytes.per.checksum", 512);
-
     private NFSDataOutputStream sums;
     private Checksum sum = new CRC32();
     private int inSum;
+    private int bytesPerSum;
 
-    public Summer(NutchFileSystem fs, File file, boolean overwrite)
+    public Summer(NutchFileSystem fs, File file, boolean overwrite, NutchConf nutchConf)
       throws IOException {
       super(fs.createRaw(file, overwrite));
-
+      this.bytesPerSum = nutchConf.getInt("io.bytes.per.checksum", 512);
       this.sums =
-        new NFSDataOutputStream(fs.createRaw(fs.getChecksumFile(file), true));
+        new NFSDataOutputStream(fs.createRaw(fs.getChecksumFile(file), true), nutchConf);
 
       sums.write(CHECKSUM_VERSION, 0, CHECKSUM_VERSION.length);
-      sums.writeInt(bytesPerSum);
+      sums.writeInt(this.bytesPerSum);
     }
 
     public void write(byte b[], int off, int len) throws IOException {
       int summed = 0;
       while (summed < len) {
 
-        int goal = bytesPerSum - inSum;
+        int goal = this.bytesPerSum - inSum;
         int inBuf = len - summed;
         int toSum = inBuf <= goal ? inBuf : goal;
 
@@ -59,7 +57,7 @@
         summed += toSum;
 
         inSum += toSum;
-        if (inSum == bytesPerSum) {
+        if (inSum == this.bytesPerSum) {
           writeSum();
         }
       }
@@ -124,15 +122,15 @@
   }
 
   public NFSDataOutputStream(NutchFileSystem fs, File file,
-                             boolean overwrite, int bufferSize)
+                             boolean overwrite, NutchConf nutchConf)
     throws IOException {
-    super(new Buffer(new PositionCache(new Summer(fs, file, overwrite)),
-                     bufferSize));
+    super(new Buffer(new PositionCache(new Summer(fs, file, overwrite, nutchConf)),
+            nutchConf.getInt("io.file.buffer.size", 4096)));
   }
 
   /** Construct without checksums. */
-  public NFSDataOutputStream(NFSOutputStream out) throws IOException {
-    this(out, NutchConf.get().getInt("io.file.buffer.size", 4096));
+  public NFSDataOutputStream(NFSOutputStream out, NutchConf nutchConf) throws IOException {
+    this(out, nutchConf.getInt("io.file.buffer.size", 4096));
   }
 
   /** Construct without checksums. */

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/fs/NutchFileSystem.java Tue Jan 31 08:08:58 2006
@@ -45,7 +45,6 @@
     public static final Logger LOG = LogFormatter.getLogger("org.apache.nutch.util.NutchFileSystem");
 
     private static final HashMap NAME_TO_FS = new HashMap();
-  
     /**
      * Parse the cmd-line args, starting at i.  Remove consumed args
      * from array.  We expect param in the form:
@@ -53,7 +52,7 @@
      *
      * @deprecated use fs.default.name config option instead
      */
-    public static NutchFileSystem parseArgs(String argv[], int i) throws IOException {
+    public static NutchFileSystem parseArgs(String argv[], int i, NutchConf nutchConf) throws IOException {
         /**
         if (argv.length - i < 1) {
             throw new IOException("Must indicate filesystem type for NDFS");
@@ -65,12 +64,12 @@
         if ("-ndfs".equals(cmd)) {
             i++;
             InetSocketAddress addr = DataNode.createSocketAddr(argv[i++]);
-            nfs = new NDFSFileSystem(addr);
+            nfs = new NDFSFileSystem(addr, nutchConf);
         } else if ("-local".equals(cmd)) {
             i++;
-            nfs = new LocalFileSystem();
+            nfs = new LocalFileSystem(nutchConf);
         } else {
-            nfs = get();                          // using default
+            nfs = get(nutchConf);                          // using default
             LOG.info("No FS indicated, using default:"+nfs.getName());
 
         }
@@ -81,30 +80,26 @@
         return nfs;
     }
 
-
-    /** Returns the default filesystem implementation.*/
-    public static NutchFileSystem get() throws IOException {
-      return get(NutchConf.get());
-    }
-
     /** Returns the configured filesystem implementation.*/
     public static NutchFileSystem get(NutchConf conf) throws IOException {
-      return getNamed(conf.get("fs.default.name", "local"));
+      return getNamed(conf.get("fs.default.name", "local"), conf);
     }
 
+    protected NutchConf nutchConf;
     /** Returns a name for this filesystem, suitable to pass to {@link
      * NutchFileSystem#getNamed(String).*/
     public abstract String getName();
   
     /** Returns a named filesystem.  Names are either the string "local" or a
      * host:port pair, naming an NDFS name server.*/
-    public static NutchFileSystem getNamed(String name) throws IOException {
+    public static NutchFileSystem getNamed(String name, NutchConf nutchConf) throws IOException {
       NutchFileSystem fs = (NutchFileSystem)NAME_TO_FS.get(name);
+      int ioFileBufferSize = nutchConf.getInt("io.file.buffer.size", 4096);
       if (fs == null) {
         if ("local".equals(name)) {
-          fs = new LocalFileSystem();
+          fs = new LocalFileSystem(nutchConf);
         } else {
-          fs = new NDFSFileSystem(DataNode.createSocketAddr(name));
+          fs = new NDFSFileSystem(DataNode.createSocketAddr(name), nutchConf);
         }
         NAME_TO_FS.put(name, fs);
       }
@@ -127,7 +122,8 @@
     ///////////////////////////////////////////////////////////////
     /**
      */
-    public NutchFileSystem() {
+    public NutchFileSystem(NutchConf nutchConf) {
+        this.nutchConf = nutchConf;
     }
 
     /**
@@ -143,12 +139,16 @@
     public abstract String[][] getFileCacheHints(File f, long start, long len) throws IOException;
 
     /**
-     * Opens an NFSDataInputStream for the indicated File.
+     * Opens an NFSDataInputStream at the indicated File.
+     * @param f the file name to open
+     * @param overwrite if a file with this name already exists, then if true,
+     *   the file will be overwritten, and if false an error will be thrown.
+     * @param bufferSize the size of the buffer to be used.
      */
-    public NFSDataInputStream open(File f) throws IOException {
-      return open(f, NutchConf.get().getInt("io.file.buffer.size", 4096));
+    public NFSDataInputStream open(File f, int bufferSize) throws IOException {
+      return new NFSDataInputStream(this, f, bufferSize, this.nutchConf);
     }
-
+    
     /**
      * Opens an NFSDataInputStream at the indicated File.
      * @param f the file name to open
@@ -156,8 +156,8 @@
      *   the file will be overwritten, and if false an error will be thrown.
      * @param bufferSize the size of the buffer to be used.
      */
-    public NFSDataInputStream open(File f, int bufferSize) throws IOException {
-      return new NFSDataInputStream(this, f, bufferSize);
+    public NFSDataInputStream open(File f) throws IOException {
+      return new NFSDataInputStream(this, f, nutchConf);
     }
 
     /**
@@ -171,8 +171,7 @@
      * Files are overwritten by default.
      */
     public NFSDataOutputStream create(File f) throws IOException {
-      return create(f, true,
-                    NutchConf.get().getInt("io.file.buffer.size", 4096));
+      return create(f, true,this.nutchConf.getInt("io.file.buffer.size", 4096));
     }
 
     /**
@@ -184,7 +183,7 @@
      */
     public NFSDataOutputStream create(File f, boolean overwrite,
                                       int bufferSize) throws IOException {
-      return new NFSDataOutputStream(this, f, overwrite, bufferSize);
+      return new NFSDataOutputStream(this, f, overwrite, this.nutchConf);
     }
 
     /** Opens an OutputStream at the indicated File.

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java Tue Jan 31 08:08:58 2006
@@ -148,7 +148,7 @@
       return new RecordReader() {
 
           private IndexReader indexReader =
-            IndexReader.open(new NdfsDirectory(fs, split.getFile(), false));
+            IndexReader.open(new NdfsDirectory(fs, split.getFile(), false, job));
 
           { indexReader.undeleteAll(); }
 
@@ -228,6 +228,7 @@
   }
     
   private NutchFileSystem fs;
+  private int ioFileBufferSize;
 
   public DeleteDuplicates() { super(null); }
 
@@ -236,6 +237,7 @@
   public void configure(JobConf job) {
     try {
       fs = NutchFileSystem.get(job);
+      this.ioFileBufferSize = job.getInt("io.file.buffer.size", 4096);
     } catch (IOException e) {
       throw new RuntimeException(e);
     }
@@ -254,7 +256,7 @@
                      OutputCollector output, Reporter reporter)
     throws IOException {
     File index = new File(key.toString());
-    IndexReader reader = IndexReader.open(new NdfsDirectory(fs, index, false));
+    IndexReader reader = IndexReader.open(new NdfsDirectory(fs, index, false, getConf()));
     try {
       while (values.hasNext()) {
         reader.delete(((IntWritable)values.next()).get());
@@ -316,6 +318,7 @@
     job.setInputKeyClass(HashScore.class);
     job.setInputValueClass(IndexDoc.class);
 
+    job.setInt("io.file.buffer.size", 4096);
     job.setMapperClass(DeleteDuplicates.class);
     job.setReducerClass(DeleteDuplicates.class);
 
@@ -331,7 +334,7 @@
   }
 
   public static void main(String[] args) throws Exception {
-    DeleteDuplicates dedup = new DeleteDuplicates(NutchConf.get());
+    DeleteDuplicates dedup = new DeleteDuplicates(new NutchConf());
     
     if (args.length < 1) {
       System.err.println("Usage: <indexes> ...");

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexMerger.java Tue Jan 31 08:08:58 2006
@@ -41,28 +41,21 @@
 
   public static final String DONE_NAME = "merge.done";
 
-  private int MERGE_FACTOR = NutchConf.get().getInt("indexer.mergeFactor",
-      IndexWriter.DEFAULT_MERGE_FACTOR);
-  private int MIN_MERGE_DOCS = NutchConf.get().getInt("indexer.minMergeDocs",
-      IndexWriter.DEFAULT_MIN_MERGE_DOCS);
-  private int MAX_MERGE_DOCS = NutchConf.get().getInt("indexer.maxMergeDocs",
-      IndexWriter.DEFAULT_MAX_MERGE_DOCS);
-  private int TERM_INDEX_INTERVAL =
-    NutchConf.get().getInt("indexer.termIndexInterval",
-                           IndexWriter.DEFAULT_TERM_INDEX_INTERVAL);
   private NutchFileSystem nfs;
   private File outputIndex;
   private File localWorkingDir;
   private File[] indexes;
+  private NutchConf nutchConf;
 
   /**
    * Merge all of the indexes given
    */
-  public IndexMerger(NutchFileSystem nfs, File[] indexes, File outputIndex, File localWorkingDir) throws IOException {
+  public IndexMerger(NutchFileSystem nfs, File[] indexes, File outputIndex, File localWorkingDir, NutchConf nutchConf) throws IOException {
       this.nfs = nfs;
       this.indexes = indexes;
       this.outputIndex = outputIndex;
       this.localWorkingDir = localWorkingDir;
+      this.nutchConf = nutchConf;
   }
 
   /**
@@ -81,7 +74,7 @@
     Directory[] dirs = new Directory[indexes.length];
     for (int i = 0; i < indexes.length; i++) {
       LOG.info("Adding " + indexes[i]);
-      dirs[i] = new NdfsDirectory(nfs, indexes[i], false);
+      dirs[i] = new NdfsDirectory(nfs, indexes[i], false, this.nutchConf);
     }
 
     //
@@ -90,10 +83,10 @@
     // Merge indices
     //
     IndexWriter writer = new IndexWriter(localOutput, null, true);
-    writer.mergeFactor = MERGE_FACTOR;
-    writer.minMergeDocs = MIN_MERGE_DOCS;
-    writer.maxMergeDocs = MAX_MERGE_DOCS;
-    writer.setTermIndexInterval(TERM_INDEX_INTERVAL);
+    writer.mergeFactor = nutchConf.getInt("indexer.mergeFactor", IndexWriter.DEFAULT_MERGE_FACTOR);
+    writer.minMergeDocs = nutchConf.getInt("indexer.minMergeDocs", IndexWriter.DEFAULT_MIN_MERGE_DOCS);
+    writer.maxMergeDocs = nutchConf.getInt("indexer.maxMergeDocs", IndexWriter.DEFAULT_MAX_MERGE_DOCS);
+    writer.setTermIndexInterval(nutchConf.getInt("indexer.termIndexInterval", IndexWriter.DEFAULT_TERM_INDEX_INTERVAL));
     writer.infoStream = LogFormatter.getLogStream(LOG, Level.FINE);
     writer.setUseCompoundFile(false);
     writer.setSimilarity(new NutchSimilarity());
@@ -121,7 +114,8 @@
     //
     // Parse args, read all index directories to be processed
     //
-    NutchFileSystem nfs = NutchFileSystem.get();
+    NutchConf nutchConf = new NutchConf();
+    NutchFileSystem nfs = NutchFileSystem.get(nutchConf);
     File workDir = new File(new File("").getCanonicalPath());
     List indexDirs = new ArrayList();
 
@@ -146,12 +140,12 @@
     File[] indexFiles = (File[])indexDirs.toArray(new File[indexDirs.size()]);
 
     if (workDir.exists()) {
-      FileUtil.fullyDelete(workDir);
+      FileUtil.fullyDelete(workDir, nutchConf);
     }
     workDir.mkdirs();
-    IndexMerger merger = new IndexMerger(nfs,indexFiles,outputIndex,workDir);
+    IndexMerger merger = new IndexMerger(nfs,indexFiles,outputIndex,workDir, nutchConf);
     merger.merge();
     LOG.info("done merging");
-    FileUtil.fullyDelete(workDir);
+    FileUtil.fullyDelete(workDir, nutchConf);
   }
 }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexSorter.java Tue Jan 31 08:08:58 2006
@@ -225,14 +225,14 @@
     this.directory = directory;
   }
 
-  public void sort() throws IOException {
+  public void sort(int termIndexInterval) throws IOException {
     IndexReader reader = IndexReader.open(new File(directory, "index"));
 
     SortingReader sorter = new SortingReader(reader, oldToNew(reader));
     IndexWriter writer = new IndexWriter(new File(directory, "index-sorted"),
                                          null, true);
     writer.setTermIndexInterval
-      (NutchConf.get().getInt("indexer.termIndexInterval", 128));
+      (termIndexInterval);
     writer.setUseCompoundFile(false);
     writer.addIndexes(new IndexReader[] { sorter });
     writer.close();
@@ -283,8 +283,9 @@
     IndexSorter sorter = new IndexSorter(directory);
 
     Date start = new Date();
-
-    sorter.sort();
+    NutchConf nutchConf = new NutchConf();
+    int termIndexInterval = nutchConf.getInt("indexer.termIndexInterval", 128);
+    sorter.sort(termIndexInterval);
 
     Date end = new Date();
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/Indexer.java Tue Jan 31 08:08:58 2006
@@ -55,7 +55,7 @@
 
       reporter.setStatus(split.toString());
       
-      return new SequenceFileRecordReader(fs, split) {
+      return new SequenceFileRecordReader(job, split) {
           public synchronized boolean next(Writable key, Writable value)
             throws IOException {
             ObjectWritable wrapper = (ObjectWritable)value;
@@ -83,7 +83,7 @@
 
       final IndexWriter writer =                  // build locally first
         new IndexWriter(fs.startLocalOutput(perm, temp),
-                        new NutchDocumentAnalyzer(), true);
+                        new NutchDocumentAnalyzer(job), true);
 
       writer.mergeFactor = job.getInt("indexer.mergeFactor", 10);
       writer.minMergeDocs = job.getInt("indexer.minMergeDocs", 100);
@@ -92,7 +92,7 @@
       writer.setTermIndexInterval
         (job.getInt("indexer.termIndexInterval", 128));
       writer.maxFieldLength = job.getInt("indexer.max.tokens", 10000);
-      //writer.infoStream = LogFormatter.getLogStream(LOG, Level.FINE);
+      writer.infoStream = LogFormatter.getLogStream(LOG, Level.INFO);
       writer.setUseCompoundFile(false);
       writer.setSimilarity(new NutchSimilarity());
 
@@ -133,6 +133,8 @@
     }
   }
 
+  private IndexingFilters filters;
+
   public Indexer() {
     super(null);
   }
@@ -146,6 +148,8 @@
 
   public void configure(JobConf job) {
     scorePower = job.getFloat("indexer.score.power", 0.5f);
+    setConf(job);
+    this.filters = new IndexingFilters(getConf());
   }
 
   public void reduce(WritableComparable key, Iterator values,
@@ -217,7 +221,7 @@
 
     try {
       // run indexing filters
-      doc = IndexingFilters.filter(doc,new ParseImpl(parseText, parseData), (UTF8)key, fetchDatum, inlinks);
+      doc = this.filters.filter(doc,new ParseImpl(parseText, parseData), (UTF8)key, fetchDatum, inlinks);
     } catch (IndexingException e) {
       LOG.warning("Error indexing "+key+": "+e);
       return;
@@ -261,7 +265,7 @@
   }
 
   public static void main(String[] args) throws Exception {
-    Indexer indexer = new Indexer(NutchConf.get());
+    Indexer indexer = new Indexer(new NutchConf());
     
     if (args.length < 4) {
       System.err.println("Usage: <index> <crawldb> <linkdb> <segment> ...");

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilter.java Tue Jan 31 08:08:58 2006
@@ -18,6 +18,7 @@
 
 import org.apache.lucene.document.Document;
 import org.apache.nutch.parse.Parse;
+import org.apache.nutch.util.NutchConfigurable;
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
 import org.apache.nutch.io.UTF8;
@@ -26,7 +27,7 @@
  * fields.  All plugins found which implement this extension point are run
  * sequentially on the parse.
  */
-public interface IndexingFilter {
+public interface IndexingFilter extends NutchConfigurable {
   /** The name of the extension point. */
   final static String X_POINT_ID = IndexingFilter.class.getName();
 

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java?rev=373853&r1=373852&r2=373853&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFilters.java Tue Jan 31 08:08:58 2006
@@ -22,6 +22,7 @@
 
 import org.apache.nutch.plugin.*;
 import org.apache.nutch.parse.Parse;
+import org.apache.nutch.util.NutchConf;
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.crawl.Inlinks;
 import org.apache.nutch.io.UTF8;
@@ -29,37 +30,38 @@
 /** Creates and caches {@link IndexingFilter} implementing plugins.*/
 public class IndexingFilters {
 
-  private static final IndexingFilter[] CACHE;
-  static {
-    try {
-      ExtensionPoint point = PluginRepository.getInstance()
-        .getExtensionPoint(IndexingFilter.X_POINT_ID);
-      if (point == null)
-        throw new RuntimeException(IndexingFilter.X_POINT_ID+" not found.");
-      Extension[] extensions = point.getExtensions();
-      HashMap filterMap = new HashMap();
-      for (int i = 0; i < extensions.length; i++) {
-        Extension extension = extensions[i];
-        IndexingFilter filter = (IndexingFilter)extension.getExtensionInstance();
-        System.out.println("-adding " + filter.getClass().getName());
-        if (!filterMap.containsKey(filter.getClass().getName())) {
-        	filterMap.put(filter.getClass().getName(), filter);
-        }
-      }
-      CACHE = (IndexingFilter[])filterMap.values().toArray(new IndexingFilter[0]);
-    } catch (PluginRuntimeException e) {
-      throw new RuntimeException(e);
-    }
-  }
+  private IndexingFilter[] indexingFilters;
 
-  private  IndexingFilters() {}                  // no public ctor
+  public IndexingFilters(NutchConf nutchConf) {
+      this.indexingFilters =(IndexingFilter[]) nutchConf.getObject(IndexingFilter.class.getName()); 
+      if (this.indexingFilters == null) {
+            try {
+                ExtensionPoint point = nutchConf.getPluginRepository().getExtensionPoint(IndexingFilter.X_POINT_ID);
+                if (point == null)
+                    throw new RuntimeException(IndexingFilter.X_POINT_ID + " not found.");
+                Extension[] extensions = point.getExtensions();
+                HashMap filterMap = new HashMap();
+                for (int i = 0; i < extensions.length; i++) {
+                    Extension extension = extensions[i];
+                    IndexingFilter filter = (IndexingFilter) extension.getExtensionInstance();
+                    System.out.println("-adding " + filter.getClass().getName());
+                    if (!filterMap.containsKey(filter.getClass().getName())) {
+                        filterMap.put(filter.getClass().getName(), filter);
+                    }
+                }
+                nutchConf.setObject(IndexingFilter.class.getName(), (IndexingFilter[]) filterMap.values().toArray(new IndexingFilter[0]));
+            } catch (PluginRuntimeException e) {
+                throw new RuntimeException(e);
+            }
+            this.indexingFilters =(IndexingFilter[]) nutchConf.getObject(IndexingFilter.class.getName());
+        }
+  }                  
 
   /** Run all defined filters. */
-  public static Document filter(Document doc, Parse parse, UTF8 url, CrawlDatum datum, Inlinks inlinks)
+  public Document filter(Document doc, Parse parse, UTF8 url, CrawlDatum datum, Inlinks inlinks)
     throws IndexingException {
-
-    for (int i = 0; i < CACHE.length; i++) {
-      doc = CACHE[i].filter(doc, parse, url, datum, inlinks);
+    for (int i = 0; i < this.indexingFilters.length; i++) {
+      doc = this.indexingFilters[i].filter(doc, parse, url, datum, inlinks);
     }
 
     return doc;