You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by wh...@apache.org on 2006/11/27 21:25:33 UTC

svn commit: r479749 - /lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java

Author: whoschek
Date: Mon Nov 27 12:25:32 2006
New Revision: 479749

URL: http://svn.apache.org/viewvc?view=rev&rev=479749
Log:
Reverting back because of:
"Actually, now I'm considering reverting back to the version without a public clear() method. The rationale is that this would be less complex and more consistent with the AnalyzerUtil design (simple methods generating simple anonymous analyzer wrappers). If desired, you can still (re)use a single static "child" analyzer instance. It's cheap and easy to create a new caching analyzer on top of the static analyzer, and to do so before each document. The old one will simply be gc'd."

Modified:
    lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java

Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java?view=diff&rev=479749&r1=479748&r2=479749
==============================================================================
--- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java (original)
+++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/AnalyzerUtil.java Mon Nov 27 12:25:32 2006
@@ -203,10 +203,10 @@
 
   
   /**
-   * Analyzer wrapper that caches all tokens generated by the underlying child analyzer's
+   * Returns an analyzer wrapper that caches all tokens generated by the underlying child analyzer's
    * token streams, and delivers those cached tokens on subsequent calls to 
-   * <code>tokenStream(String fieldName, Reader reader)</code>, 
-   * if the fieldName has been seen before, altogether ignoring the Reader parameter.
+   * <code>tokenStream(String fieldName, Reader reader)</code> 
+   * if the fieldName has been seen before, altogether ignoring the Reader parameter on cache lookup.
    * <p>
    * If Analyzer / TokenFilter chains are expensive in terms of I/O or CPU, such caching can 
    * help improve performance if the same document is added to multiple Lucene indexes, 
@@ -216,61 +216,49 @@
    * <ul>
    * <li>Caching the tokens of large Lucene documents can lead to out of memory exceptions.</li> 
    * <li>The Token instances delivered by the underlying child analyzer must be immutable.</li>
-   * <li>A caching analyzer instance must not be used for more than one document, unless 
-   * <code>clear()</code> is called before each new document.</li>
+   * <li>A caching analyzer instance must not be used for more than one document
+   * because the cache is not keyed on the Reader parameter.</li>
    * </ul>
+   * 
+   * @param child
+   *            the underlying child analyzer
+   * @return a new analyzer
    */
-  public static class TokenCachingAnalyzer extends Analyzer {
-    
-    private final Analyzer child;
-    private final HashMap cache = new HashMap();
-      
-    /**
-     * Creates and returns a new caching analyzer that wraps the given underlying child analyzer.
-     * 
-     * @param child
-     *            the underlying child analyzer
-     * @return a new caching analyzer
-     */
-    public TokenCachingAnalyzer(Analyzer child) {
-      if (child == null)
-        throw new IllegalArgumentException("child analyzer must not be null");
-
-      this.child = child;
-    }
-    
-    /**
-     * Removes all cached data.
-     */
-    public void clear() {
-      cache.clear();
-    }
-
-    public TokenStream tokenStream(String fieldName, Reader reader) {
-      final ArrayList tokens = (ArrayList) cache.get(fieldName);
-      if (tokens == null) { // not yet cached
-        final ArrayList tokens2 = new ArrayList();
-        cache.put(fieldName, tokens2);
-        return new TokenFilter(child.tokenStream(fieldName, reader)) {
-
-          public Token next() throws IOException {
-            Token token = input.next(); // from filter super class
-            if (token != null) tokens2.add(token);
-            return token;
-          }
-        };
-      } else { // already cached
-        return new TokenStream() {
-
-          private Iterator iter = tokens.iterator();
-
-          public Token next() {
-            if (!iter.hasNext()) return null;
-            return (Token) iter.next();
-          }
-        };
+  public static Analyzer getTokenCachingAnalyzer(final Analyzer child) {
+
+    if (child == null)
+      throw new IllegalArgumentException("child analyzer must not be null");
+
+    return new Analyzer() {
+
+      private final HashMap cache = new HashMap();
+
+      public TokenStream tokenStream(String fieldName, Reader reader) {
+        final ArrayList tokens = (ArrayList) cache.get(fieldName);
+        if (tokens == null) { // not yet cached
+          final ArrayList tokens2 = new ArrayList();
+          cache.put(fieldName, tokens2);
+          return new TokenFilter(child.tokenStream(fieldName, reader)) {
+
+            public Token next() throws IOException {
+              Token token = input.next(); // from filter super class
+              if (token != null) tokens2.add(token);
+              return token;
+            }
+          };
+        } else { // already cached
+          return new TokenStream() {
+
+            private Iterator iter = tokens.iterator();
+
+            public Token next() {
+              if (!iter.hasNext()) return null;
+              return (Token) iter.next();
+            }
+          };
+        }
       }
-    }
+    };
   }