You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by je...@apache.org on 2006/06/08 00:19:09 UTC
svn commit: r412582 -
/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java
Author: jerome
Date: Wed Jun 7 15:19:08 2006
New Revision: 412582
URL: http://svn.apache.org/viewvc?rev=412582&view=rev
Log:
NUTCH-301 : CommonTerms are cached in the Configuration
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java?rev=412582&r1=412581&r2=412582&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/analysis/CommonGrams.java Wed Jun 7 15:19:08 2006
@@ -37,7 +37,10 @@
private static final Logger LOG =
LogFormatter.getLogger("org.apache.nutch.analysis.CommonGrams");
private static final char SEPARATOR = '-';
- private HashMap COMMON_TERMS = new HashMap();
+ /** The key used to cache commonTerms in Configuration */
+ private static final String KEY = CommonGrams.class.getName();
+
+ private HashMap commonTerms = new HashMap();
/**
* The constructor.
@@ -135,7 +138,13 @@
/** Construct using the provided config file. */
private void init(Configuration conf) {
+ // First, try to retrieve some commonTerms cached in configuration.
+ commonTerms = (HashMap) conf.getObject(KEY);
+ if (commonTerms != null) { return; }
+
+ // Otherwise, read the terms.file
try {
+ commonTerms = new HashMap();
Reader reader = conf.getConfResourceAsReader
(conf.get("analysis.common.terms.file"));
BufferedReader in = new BufferedReader(reader);
@@ -160,13 +169,14 @@
while ((token = ts.next()) != null) {
gram = gram + SEPARATOR + token.termText();
}
- HashSet table = (HashSet)COMMON_TERMS.get(field);
+ HashSet table = (HashSet)commonTerms.get(field);
if (table == null) {
table = new HashSet();
- COMMON_TERMS.put(field, table);
+ commonTerms.put(field, table);
}
table.add(gram);
}
+ conf.setObject(KEY, commonTerms);
} catch (IOException e) {
throw new RuntimeException(e.toString());
}
@@ -175,7 +185,7 @@
/** Construct a token filter that inserts n-grams for common terms. For use
* while indexing documents. */
public TokenFilter getFilter(TokenStream ts, String field) {
- return new Filter(ts, (HashSet)COMMON_TERMS.get(field));
+ return new Filter(ts, (HashSet)commonTerms.get(field));
}
/** Utility to convert an array of Query.Terms into a token stream. */