You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/06/08 00:19:09 UTC

svn commit: r412582 - /lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java

Author: jerome
Date: Wed Jun  7 15:19:08 2006
New Revision: 412582

URL: http://svn.apache.org/viewvc?rev=412582&view=rev
Log:
NUTCH-301 : CommonTerms are cached in the Configuration

Modified:
    lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java?rev=412582&r1=412581&r2=412582&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java Wed Jun  7 15:19:08 2006
@@ -37,7 +37,10 @@
   private static final Logger LOG =
     LogFormatter.getLogger("org.apache.nutch.analysis.CommonGrams");
   private static final char SEPARATOR = '-';
-  private HashMap COMMON_TERMS = new HashMap();
+  /** The key used to cache commonTerms in Configuration */
+  private static final String KEY = CommonGrams.class.getName();
+
+  private HashMap commonTerms = new HashMap();
   
   /**
    * The constructor.
@@ -135,7 +138,13 @@
 
   /** Construct using the provided config file. */
   private void init(Configuration conf) {
+    // First, try to retrieve some commonTerms cached in configuration.
+    commonTerms = (HashMap) conf.getObject(KEY);
+    if (commonTerms != null) { return; }
+
+    // Otherwise, read the terms.file
     try {
+      commonTerms = new HashMap();
       Reader reader = conf.getConfResourceAsReader
         (conf.get("analysis.common.terms.file"));
       BufferedReader in = new BufferedReader(reader);
@@ -160,13 +169,14 @@
         while ((token = ts.next()) != null) {
           gram = gram + SEPARATOR + token.termText();
         }
-        HashSet table = (HashSet)COMMON_TERMS.get(field);
+        HashSet table = (HashSet)commonTerms.get(field);
         if (table == null) {
           table = new HashSet();
-          COMMON_TERMS.put(field, table);
+          commonTerms.put(field, table);
         }
         table.add(gram);
       }
+      conf.setObject(KEY, commonTerms);
     } catch (IOException e) {
       throw new RuntimeException(e.toString());
     }
@@ -175,7 +185,7 @@
   /** Construct a token filter that inserts n-grams for common terms.  For use
    * while indexing documents.  */
   public TokenFilter getFilter(TokenStream ts, String field) {
-    return new Filter(ts, (HashSet)COMMON_TERMS.get(field));
+    return new Filter(ts, (HashSet)commonTerms.get(field));
   }
 
   /** Utility to convert an array of Query.Terms into a token stream. */