You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ma...@apache.org on 2010/03/14 21:58:34 UTC

svn commit: r922957 [1/3] - in /lucene/solr/branches/solr: ./ lib/ src/common/org/apache/solr/common/util/ src/java/org/apache/solr/analysis/ src/java/org/apache/solr/handler/ src/java/org/apache/solr/handler/admin/ src/java/org/apache/solr/handler/com...

Author: markrmiller
Date: Sun Mar 14 20:58:32 2010
New Revision: 922957

URL: http://svn.apache.org/viewvc?rev=922957&view=rev
Log:
a hackey commit of stuff needed to get on lucene 3.0.1

Added:
    lucene/solr/branches/solr/lib/lucene-analyzers-3.0.1.jar   (with props)
    lucene/solr/branches/solr/lib/lucene-collation-3.0.1.jar   (with props)
    lucene/solr/branches/solr/lib/lucene-core-3.0.1.jar   (with props)
    lucene/solr/branches/solr/lib/lucene-fast-vector-highlighter-3.0.1.jar   (with props)
    lucene/solr/branches/solr/lib/lucene-highlighter-3.0.1.jar   (with props)
    lucene/solr/branches/solr/lib/lucene-memory-3.0.1.jar   (with props)
    lucene/solr/branches/solr/lib/lucene-misc-3.0.1.jar   (with props)
    lucene/solr/branches/solr/lib/lucene-queries-3.0.1.jar   (with props)
    lucene/solr/branches/solr/lib/lucene-snowball-3.0.1.jar   (with props)
    lucene/solr/branches/solr/lib/lucene-spatial-3.0.1.jar   (with props)
    lucene/solr/branches/solr/lib/lucene-spellchecker-3.0.1.jar   (with props)
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/WordDelimiterIterator.java
Removed:
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/LengthFilter.java
Modified:
    lucene/solr/branches/solr/common-build.xml
    lucene/solr/branches/solr/src/common/org/apache/solr/common/util/ConcurrentLRUCache.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/HTMLStripStandardTokenizerFactory.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianCommon.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/StandardTokenizerFactory.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/StopFilterFactory.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/SynonymFilter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
    lucene/solr/branches/solr/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
    lucene/solr/branches/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
    lucene/solr/branches/solr/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
    lucene/solr/branches/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
    lucene/solr/branches/solr/src/java/org/apache/solr/handler/component/QueryComponent.java
    lucene/solr/branches/solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
    lucene/solr/branches/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
    lucene/solr/branches/solr/src/java/org/apache/solr/handler/component/TermVectorComponent.java
    lucene/solr/branches/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/response/BaseResponseWriter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/response/BinaryResponseWriter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/schema/CompressableField.java
    lucene/solr/branches/solr/src/java/org/apache/solr/schema/FieldType.java
    lucene/solr/branches/solr/src/java/org/apache/solr/schema/TrieDateField.java
    lucene/solr/branches/solr/src/java/org/apache/solr/schema/TrieField.java
    lucene/solr/branches/solr/src/java/org/apache/solr/search/DocSetHitCollector.java
    lucene/solr/branches/solr/src/java/org/apache/solr/search/PrefixFilter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/search/QueryResultKey.java
    lucene/solr/branches/solr/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
    lucene/solr/branches/solr/src/java/org/apache/solr/search/SolrIndexReader.java
    lucene/solr/branches/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
    lucene/solr/branches/solr/src/java/org/apache/solr/search/SolrQueryParser.java
    lucene/solr/branches/solr/src/java/org/apache/solr/search/WildcardFilter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/search/function/FunctionQuery.java
    lucene/solr/branches/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
    lucene/solr/branches/solr/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java
    lucene/solr/branches/solr/src/java/org/apache/solr/spelling/IndexBasedSpellChecker.java
    lucene/solr/branches/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/tst/OldRequestHandler.java
    lucene/solr/branches/solr/src/java/org/apache/solr/tst/TestRequestHandler.java
    lucene/solr/branches/solr/src/java/org/apache/solr/update/SolrIndexWriter.java
    lucene/solr/branches/solr/src/java/org/apache/solr/update/UpdateHandler.java
    lucene/solr/branches/solr/src/test/org/apache/solr/BasicFunctionalityTest.java
    lucene/solr/branches/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java
    lucene/solr/branches/solr/src/test/org/apache/solr/core/AlternateDirectoryTest.java
    lucene/solr/branches/solr/src/test/org/apache/solr/core/TestArbitraryIndexDir.java
    lucene/solr/branches/solr/src/test/org/apache/solr/highlight/HighlighterTest.java
    lucene/solr/branches/solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
    lucene/solr/branches/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java
    lucene/solr/branches/solr/src/test/org/apache/solr/update/DirectUpdateHandlerOptimizeTest.java
    lucene/solr/branches/solr/src/test/org/apache/solr/update/DirectUpdateHandlerTest.java
    lucene/solr/branches/solr/src/test/org/apache/solr/util/TestCharArrayMap.java
    lucene/solr/branches/solr/src/test/org/apache/solr/util/TestOpenBitSet.java

Modified: lucene/solr/branches/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/common-build.xml?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/common-build.xml (original)
+++ lucene/solr/branches/solr/common-build.xml Sun Mar 14 20:58:32 2010
@@ -114,7 +114,7 @@
        The version suffix of the Lucene artifacts checked into "lib" 
        IF YOU CHANGE THIS, SANITY CHECK "javadoc.link.lucene"
   -->
-  <property name="lucene_version" value="2.9.2"/>
+  <property name="lucene_version" value="3.0.1"/>
   <!-- The version number to assign to the Maven artifacts. -->
   <property name="maven_version" value="1.5-SNAPSHOT"/>
 

Added: lucene/solr/branches/solr/lib/lucene-analyzers-3.0.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/lib/lucene-analyzers-3.0.1.jar?rev=922957&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/branches/solr/lib/lucene-analyzers-3.0.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/branches/solr/lib/lucene-collation-3.0.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/lib/lucene-collation-3.0.1.jar?rev=922957&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/branches/solr/lib/lucene-collation-3.0.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/branches/solr/lib/lucene-core-3.0.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/lib/lucene-core-3.0.1.jar?rev=922957&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/branches/solr/lib/lucene-core-3.0.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/branches/solr/lib/lucene-fast-vector-highlighter-3.0.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/lib/lucene-fast-vector-highlighter-3.0.1.jar?rev=922957&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/branches/solr/lib/lucene-fast-vector-highlighter-3.0.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/branches/solr/lib/lucene-highlighter-3.0.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/lib/lucene-highlighter-3.0.1.jar?rev=922957&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/branches/solr/lib/lucene-highlighter-3.0.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/branches/solr/lib/lucene-memory-3.0.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/lib/lucene-memory-3.0.1.jar?rev=922957&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/branches/solr/lib/lucene-memory-3.0.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/branches/solr/lib/lucene-misc-3.0.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/lib/lucene-misc-3.0.1.jar?rev=922957&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/branches/solr/lib/lucene-misc-3.0.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/branches/solr/lib/lucene-queries-3.0.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/lib/lucene-queries-3.0.1.jar?rev=922957&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/branches/solr/lib/lucene-queries-3.0.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/branches/solr/lib/lucene-snowball-3.0.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/lib/lucene-snowball-3.0.1.jar?rev=922957&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/branches/solr/lib/lucene-snowball-3.0.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/branches/solr/lib/lucene-spatial-3.0.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/lib/lucene-spatial-3.0.1.jar?rev=922957&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/branches/solr/lib/lucene-spatial-3.0.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/branches/solr/lib/lucene-spellchecker-3.0.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/lib/lucene-spellchecker-3.0.1.jar?rev=922957&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/branches/solr/lib/lucene-spellchecker-3.0.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/solr/branches/solr/src/common/org/apache/solr/common/util/ConcurrentLRUCache.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/common/org/apache/solr/common/util/ConcurrentLRUCache.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/common/org/apache/solr/common/util/ConcurrentLRUCache.java (original)
+++ lucene/solr/branches/solr/src/common/org/apache/solr/common/util/ConcurrentLRUCache.java Sun Mar 14 20:58:32 2010
@@ -366,12 +366,12 @@ public class ConcurrentLRUCache<K,V> {
     // necessary because maxSize is private in base class
     public Object myInsertWithOverflow(Object element) {
       if (size() < myMaxSize) {
-        put(element);
+        add(element);
         return null;
       } else if (size() > 0 && !lessThan(element, heap[1])) {
         Object ret = heap[1];
         heap[1] = element;
-        adjustTop();
+        updateTop();
         return ret;
       } else {
         return element;

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java Sun Mar 14 20:58:32 2010
@@ -20,6 +20,13 @@ package org.apache.solr.analysis;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource; // javadoc @link
 
 import java.io.IOException;
 import java.util.LinkedList;
@@ -56,13 +63,23 @@ import java.util.LinkedList;
  * responsibility of the implementing subclass. In the "A" "B" => "A" "A" "B"
  * example above, the subclass must clone the additional "A" it creates.
  * 
- * @version $Id$
+ * @deprecated This class does not support custom attributes. Extend TokenFilter instead,
+ * using {@link AttributeSource#captureState()} and {@link AttributeSource#restoreState()}
+ * which support all attributes.
  */
+@Deprecated
 public abstract class BufferedTokenStream extends TokenFilter {
   // in the future, might be faster if we implemented as an array based CircularQueue
   private final LinkedList<Token> inQueue = new LinkedList<Token>();
   private final LinkedList<Token> outQueue = new LinkedList<Token>();
 
+  private final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class);
+  private final OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+  private final TypeAttribute typeAtt = (TypeAttribute) addAttribute(TypeAttribute.class);
+  private final FlagsAttribute flagsAtt = (FlagsAttribute) addAttribute(FlagsAttribute.class);
+  private final PayloadAttribute payloadAtt = (PayloadAttribute) addAttribute(PayloadAttribute.class);
+  private final PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+  
   public BufferedTokenStream(TokenStream input) {
     super(input);
   }
@@ -77,13 +94,13 @@ public abstract class BufferedTokenStrea
    */
   protected abstract Token process(Token t) throws IOException;
 
-  public final Token next() throws IOException {
+  public final boolean incrementToken() throws IOException {
     while (true) {
-      if (!outQueue.isEmpty()) return outQueue.removeFirst();
+      if (!outQueue.isEmpty()) return writeToken(outQueue.removeFirst());
       Token t = read();
-      if (null == t) return null;
+      if (null == t) return false;
       Token out = process(t);
-      if (null != out) return out;
+      if (null != out) return writeToken(out);
       // loop back to top in case process() put something on the output queue
     }
   }
@@ -94,7 +111,7 @@ public abstract class BufferedTokenStrea
    */
   protected Token read() throws IOException {
     if (inQueue.isEmpty()) {
-      Token t = input.next();
+      Token t = readToken();
       return t;
     }
     return inQueue.removeFirst();
@@ -120,13 +137,41 @@ public abstract class BufferedTokenStrea
   protected Token peek(int n) throws IOException {
     int fillCount = n-inQueue.size();
     for (int i=0; i < fillCount; i++) {
-      Token t = input.next();
+      Token t = readToken();
       if (null==t) return null;
       inQueue.addLast(t);
     }
     return inQueue.get(n-1);
   }
 
+  /** old api emulation for back compat */
+  private Token readToken() throws IOException {
+    if (!input.incrementToken()) {
+      return null;
+    } else {
+      Token token = new Token();
+      token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
+      token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
+      token.setType(typeAtt.type());
+      token.setFlags(flagsAtt.getFlags());
+      token.setPositionIncrement(posIncAtt.getPositionIncrement());
+      token.setPayload(payloadAtt.getPayload());
+      return token;
+    }
+  }
+  
+  /** old api emulation for back compat */
+  private boolean writeToken(Token token) throws IOException {
+    clearAttributes();
+    termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
+    offsetAtt.setOffset(token.startOffset(), token.endOffset());
+    typeAtt.setType(token.type());
+    flagsAtt.setFlags(token.getFlags());
+    posIncAtt.setPositionIncrement(token.getPositionIncrement());
+    payloadAtt.setPayload(token.getPayload());
+    return true;
+  }
+  
   /**
    * Write a token to the buffered output stream
    */

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java Sun Mar 14 20:58:32 2010
@@ -14,20 +14,22 @@ import java.util.Arrays;
 import java.util.Set;
 
 import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 
 /*
- * TODO: Rewrite to use new TokenStream api from lucene 2.9 when BufferedTokenStream uses it.
- * TODO: Consider implementing https://issues.apache.org/jira/browse/LUCENE-1688 changes to stop list and
- * associated constructors 
+ * TODO: Consider implementing https://issues.apache.org/jira/browse/LUCENE-1688 changes to stop list and associated constructors 
  */
 
 /**
  * Construct bigrams for frequently occurring terms while indexing. Single terms
  * are still indexed too, with bigrams overlaid. This is achieved through the
- * use of {@link Token#setPositionIncrement(int)}. Bigrams have a type
- * of "gram" Example
+ * use of {@link PositionIncrementAttribute#setPositionIncrement(int)}. Bigrams have a type
+ * of {@link #GRAM_TYPE} Example:
  * <ul>
  * <li>input:"the quick brown fox"</li>
  * <li>output:|"the","the-quick"|"brown"|"fox"|</li>
@@ -40,14 +42,23 @@ import org.apache.lucene.analysis.TokenS
 /*
  * Constructors and makeCommonSet based on similar code in StopFilter
  */
+public final class CommonGramsFilter extends TokenFilter {
 
-public class CommonGramsFilter extends BufferedTokenStream {
-
+  static final String GRAM_TYPE = "gram";
   private static final char SEPARATOR = '_';
 
   private final CharArraySet commonWords;
 
-  private StringBuilder buffer = new StringBuilder();
+  private final StringBuilder buffer = new StringBuilder();
+  
+  private final TermAttribute termAttribute = (TermAttribute) addAttribute(TermAttribute.class);
+  private final OffsetAttribute offsetAttribute = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+  private final TypeAttribute typeAttribute = (TypeAttribute) addAttribute(TypeAttribute.class);
+  private final PositionIncrementAttribute posIncAttribute = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+
+  private int lastStartOffset;
+  private boolean lastWasCommon;
+  private State savedState;
 
   /**
    * Construct a token stream filtering the given input using a Set of common
@@ -57,7 +68,6 @@ public class CommonGramsFilter extends B
    * 
    * @param input TokenStream input in filter chain
    * @param commonWords The set of common words.
-   * 
    */
   public CommonGramsFilter(TokenStream input, Set commonWords) {
     this(input, commonWords, false);
@@ -80,8 +90,7 @@ public class CommonGramsFilter extends B
    * @param commonWords The set of common words.
    * @param ignoreCase -Ignore case when constructing bigrams for common words.
    */
-  public CommonGramsFilter(TokenStream input, Set commonWords,
-      boolean ignoreCase) {
+  public CommonGramsFilter(TokenStream input, Set commonWords, boolean ignoreCase) {
     super(input);
     if (commonWords instanceof CharArraySet) {
       this.commonWords = (CharArraySet) commonWords;
@@ -89,7 +98,6 @@ public class CommonGramsFilter extends B
       this.commonWords = new CharArraySet(commonWords.size(), ignoreCase);
       this.commonWords.addAll(commonWords);
     }
-    init();
   }
 
   /**
@@ -101,7 +109,6 @@ public class CommonGramsFilter extends B
    */
   public CommonGramsFilter(TokenStream input, String[] commonWords) {
     this(input, commonWords, false);
-    init();
   }
 
   /**
@@ -112,33 +119,21 @@ public class CommonGramsFilter extends B
    * @param commonWords words to be used in constructing bigrams
    * @param ignoreCase -Ignore case when constructing bigrams for common words.
    */
-  public CommonGramsFilter(TokenStream input, String[] commonWords,
-      boolean ignoreCase) {
+  public CommonGramsFilter(TokenStream input, String[] commonWords, boolean ignoreCase) {
     super(input);
-    this.commonWords = (CharArraySet) makeCommonSet(commonWords, ignoreCase);
-    init();
-  }
-
-  // Here for future moving to 2.9 api See StopFilter code
-
-  public void init() {
-    /**
-     * termAtt = (TermAttribute) addAttribute(TermAttribute.class); posIncrAtt
-     * =(PositionIncrementAttribute)
-     * addAttribute(PositionIncrementAttribute.class); typeAdd =(TypeAttribute)
-     * addAttribute(TypeAttribute.class);
-     */
+    this.commonWords = makeCommonSet(commonWords, ignoreCase);
   }
 
   /**
    * Build a CharArraySet from an array of common words, appropriate for passing
    * into the CommonGramsFilter constructor. This permits this commonWords
    * construction to be cached once when an Analyzer is constructed.
-   * 
-   * @see #makeCommonSet(java.lang.String[], boolean) passing false to
-   *      ignoreCase
+   *
+   * @param commonWords Array of common words which will be converted into the CharArraySet
+   * @return CharArraySet of the given words, appropriate for passing into the CommonGramFilter constructor
+   * @see #makeCommonSet(java.lang.String[], boolean) passing false to ignoreCase
    */
-  public static final CharArraySet makeCommonSet(String[] commonWords) {
+  public static CharArraySet makeCommonSet(String[] commonWords) {
     return makeCommonSet(commonWords, false);
   }
 
@@ -147,12 +142,11 @@ public class CommonGramsFilter extends B
    * into the CommonGramsFilter constructor,case-sensitive if ignoreCase is
    * false.
    * 
-   * @param commonWords
+   * @param commonWords Array of common words which will be converted into the CharArraySet
    * @param ignoreCase If true, all words are lower cased first.
    * @return a Set containing the words
    */
-  public static final CharArraySet makeCommonSet(String[] commonWords,
-      boolean ignoreCase) {
+  public static CharArraySet makeCommonSet(String[] commonWords, boolean ignoreCase) {
     CharArraySet commonSet = new CharArraySet(commonWords.length, ignoreCase);
     commonSet.addAll(Arrays.asList(commonWords));
     return commonSet;
@@ -163,61 +157,95 @@ public class CommonGramsFilter extends B
    * output the token. If the token and/or the following token are in the list
    * of common words also output a bigram with position increment 0 and
    * type="gram"
-   */
-  /*
-   * TODO: implement new lucene 2.9 API incrementToken() instead of deprecated
-   * Token.next() TODO:Consider adding an option to not emit unigram stopwords
+   *
+   * TODO:Consider adding an option to not emit unigram stopwords
    * as in CDL XTF BigramStopFilter, CommonGramsQueryFilter would need to be
-   * changed to work with this. TODO: Consider optimizing for the case of three
+   * changed to work with this.
+   *
+   * TODO: Consider optimizing for the case of three
    * commongrams i.e "man of the year" normally produces 3 bigrams: "man-of",
    * "of-the", "the-year" but with proper management of positions we could
    * eliminate the middle bigram "of-the"and save a disk seek and a whole set of
    * position lookups.
    */
-  public Token process(Token token) throws IOException {
-    Token next = peek(1);
-    // if this is the last token just spit it out. Any commongram would have
-    // been output in the previous call
-    if (next == null) {
-      return token;
+  public boolean incrementToken() throws IOException {
+    // get the next piece of input
+    if (savedState != null) {
+      restoreState(savedState);
+      savedState = null;
+      saveTermBuffer();
+      return true;
+    } else if (!input.incrementToken()) {
+        return false;
     }
-
-    /**
-     * if this token or next are common then construct a bigram with type="gram"
-     * position increment = 0, and put it in the output queue. It will be
-     * returned when super.next() is called, before this method gets called with
-     * a new token from the input stream See implementation of next() in
-     * BufferedTokenStream
+    
+    /* We build n-grams before and after stopwords. 
+     * When valid, the buffer always contains at least the separator.
+     * If its empty, there is nothing before this stopword.
      */
-
-    if (isCommon(token) || isCommon(next)) {
-      Token gram = gramToken(token, next);
-      write(gram);
+    if (lastWasCommon || (isCommon() && buffer.length() > 0)) {
+      savedState = captureState();
+      gramToken();
+      return true;      
     }
-    // we always return the unigram token
-    return token;
+
+    saveTermBuffer();
+    return true;
   }
 
-  /** True if token is for a common term. */
-  private boolean isCommon(Token token) {
-    return commonWords != null
-        && commonWords.contains(token.termBuffer(), 0, token.termLength());
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    lastWasCommon = false;
+    savedState = null;
+    buffer.setLength(0);
   }
 
-  /** Construct a compound token. */
-  private Token gramToken(Token first, Token second) {
+  // ================================================= Helper Methods ================================================
+
+  /**
+   * Determines if the current token is a common term
+   *
+   * @return {@code true} if the current token is a common term, {@code false} otherwise
+   */
+  private boolean isCommon() {
+    return commonWords != null && commonWords.contains(termAttribute.termBuffer(), 0, termAttribute.termLength());
+  }
+
+  /**
+   * Saves this information to form the left part of a gram
+   */
+  private void saveTermBuffer() {
     buffer.setLength(0);
-    buffer.append(first.termText());
+    buffer.append(termAttribute.termBuffer(), 0, termAttribute.termLength());
     buffer.append(SEPARATOR);
-    buffer.append(second.termText());
-    Token result = new Token(buffer.toString(), first.startOffset(), second
-        .endOffset(), "gram");
-    result.setPositionIncrement(0);
-    return result;
+    lastStartOffset = offsetAttribute.startOffset();
+    lastWasCommon = isCommon();
   }
-  
-  public void reset() throws IOException {
-    super.reset();
+
+  /**
+   * Constructs a compound token.
+   */
+  private void gramToken() {
+    buffer.append(termAttribute.termBuffer(), 0, termAttribute.termLength());
+    int endOffset = offsetAttribute.endOffset();
+
+    clearAttributes();
+
+    int length = buffer.length();
+    char termText[] = termAttribute.termBuffer();
+    if (length > termText.length) {
+      termText = termAttribute.resizeTermBuffer(length);
+    }
+    
+    buffer.getChars(0, length, termText, 0);
+    termAttribute.setTermLength(length);
+    posIncAttribute.setPositionIncrement(0);
+    offsetAttribute.setOffset(lastStartOffset, endOffset);
+    typeAttribute.setType(GRAM_TYPE);
     buffer.setLength(0);
   }
 }

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsFilterFactory.java Sun Mar 14 20:58:32 2010
@@ -57,7 +57,7 @@ public class CommonGramsFilterFactory ex
         throw new RuntimeException(e);
       }
     } else {
-      commonWords = (CharArraySet) CommonGramsFilter.makeCommonSet(StopAnalyzer.ENGLISH_STOP_WORDS, ignoreCase);
+      commonWords = (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET;
     }
   }
       

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilter.java Sun Mar 14 20:58:32 2010
@@ -18,8 +18,11 @@ package org.apache.solr.analysis;
 
 import java.io.IOException;
 
-import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+
+import static org.apache.solr.analysis.CommonGramsFilter.GRAM_TYPE;
 
 /**
  * Wrap a CommonGramsFilter optimizing phrase queries by only returning single
@@ -36,33 +39,36 @@ import org.apache.lucene.analysis.Token;
  */
 
 /*
- * TODO: When org.apache.solr.analysis.BufferedTokenStream is changed to use the
- * 2.9 lucene TokenStream api, make necessary changes here.
  * See:http://hudson.zones
  * .apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache
  * /lucene/analysis/TokenStream.html and
  * http://svn.apache.org/viewvc/lucene/java
  * /trunk/src/java/org/apache/lucene/analysis/package.html?revision=718798
  */
-public class CommonGramsQueryFilter extends BufferedTokenStream {
-  //private CharArraySet commonWords;
-  private Token prev;
+public final class CommonGramsQueryFilter extends TokenFilter {
+
+  private final TypeAttribute typeAttribute = (TypeAttribute) addAttribute(TypeAttribute.class);
+  private final PositionIncrementAttribute posIncAttribute = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+  
+  private State previous;
+  private String previousType;
 
   /**
-   * Constructor
-   * 
-   * @param input must be a CommonGramsFilter!
+   * Constructs a new CommonGramsQueryFilter based on the provided CommomGramsFilter 
    * 
+   * @param input CommonGramsFilter the QueryFilter will use
    */
-
   public CommonGramsQueryFilter(CommonGramsFilter input) {
     super(input);
-    prev = new Token();
   }
-  
+
+  /**
+   * {@inheritDoc}
+   */
   public void reset() throws IOException {
     super.reset();
-    prev = new Token();
+    previous = null;
+    previousType = null;
   }
   
   /**
@@ -71,68 +77,47 @@ public class CommonGramsQueryFilter exte
    * <ul>
    * <li>input: "the rain in spain falls mainly"
    * <li>output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly"
+   * </ul>
    */
-
-  public Token process(Token token) throws IOException {
-    Token next = peek(1);
-    /*
-     * Deal with last token (next=null when current token is the last word) Last
-     * token will be a unigram. If previous token was a bigram, then we already
-     * output the last token as part of the unigram and should not additionally
-     * output the unigram. <p> Example: If the end of the input to the
-     * CommonGramsFilter is "...the plain" <ul> <li>current token = "plain"</li>
-     * <li>next token = null</li> <li>previous token = "the-plain" (bigram)</li>
-     * <li> Since the word "plain" was already output as part of the bigram we
-     * don't output it.</li> </ul> Example: If the end of the input to the
-     * CommonGramsFilter is "falls mainly" <ul> <li>current token =
-     * "mainly"</li> <li>next token = null</li> <li>previous token = "falls"
-     * (unigram)</li> <li>Since we haven't yet output the current token, we
-     * output it</li> </ul>
-     */
-
-    // Deal with special case of last token
-    if (next == null) {
-      if (prev == null) {
-        // This is the first and only token i.e. one word query
-        return token;
-      }
-      if (prev != null && prev.type() != "gram") {
-        // If previous token was a unigram, output the current token
-        return token;
-      } else {
-        // If previous token was a bigram, we already output it and this token
-        // was output as part of the bigram so we are done.
-        return null;
+  public boolean incrementToken() throws IOException {
+    while (input.incrementToken()) {
+      State current = captureState();
+
+      if (previous != null && !isGramType()) {
+        restoreState(previous);
+        previous = current;
+        previousType = typeAttribute.type();
+        
+        if (isGramType()) {
+          posIncAttribute.setPositionIncrement(1);
+        }
+        return true;
       }
+
+      previous = current;
     }
 
-    /*
-     * Possible cases are: |token |next 1|word |gram 2|word |word The
-     * CommonGramsFilter we are wrapping always outputs the unigram word prior
-     * to outputting an optional bigram: "the sound of" gets output as |"the",
-     * "the_sound"|"sound", "sound_of" For case 1 we consume the gram from the
-     * input stream and output it rather than the current token This means that
-     * the call to super.next() which reads a token from input and passes it on
-     * to this process method will always get a token of type word
-     */
-    if (next != null && next.type() == "gram") {
-      // consume "next" token from list and output it
-      token = read();
-      // use this to clone the token because clone requires all these args but
-      // won't take the token.type
-      // see
-      // http://hudson.zones.apache.org/hudson/job/Lucene-trunk/javadoc//all/org/apache/lucene/analysis/Token.html
-      prev.reinit(token.termBuffer(), 0, token.termLength(), token
-          .startOffset(), token.endOffset(), token.type());
-      token.setPositionIncrement(1);
-      return token;
+    if (previous == null || GRAM_TYPE.equals(previousType)) {
+      return false;
     }
+    
+    restoreState(previous);
+    previous = null;
+    
+    if (isGramType()) {
+      posIncAttribute.setPositionIncrement(1);
+    }
+    return true;
+  }
 
-    // if the next token is not a bigram, then output the token
-    // see note above regarding this method of copying token to prev
-    prev.reinit(token.termBuffer(), 0, token.termLength(), token.startOffset(),
-        token.endOffset(), token.type());
-    assert token.type() == "word";
-    return token;
+  // ================================================= Helper Methods ================================================
+
+  /**
+   * Convenience method to check if the current type is a gram type
+   * 
+   * @return {@code true} if the current type is a gram type, {@code false} otherwise
+   */
+  public boolean isGramType() {
+    return GRAM_TYPE.equals(typeAttribute.type());
   }
 }

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/CommonGramsQueryFilterFactory.java Sun Mar 14 20:58:32 2010
@@ -59,8 +59,7 @@ public class CommonGramsQueryFilterFacto
         throw new RuntimeException(e);
       }
     } else {
-      commonWords = (CharArraySet) CommonGramsFilter.makeCommonSet(
-          StopAnalyzer.ENGLISH_STOP_WORDS, ignoreCase);
+      commonWords = (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET;
     }
   }
 

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java Sun Mar 14 20:58:32 2010
@@ -23,7 +23,6 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.el.GreekCharsets;
 import org.apache.lucene.analysis.el.GreekLowerCaseFilter;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
@@ -32,40 +31,16 @@ import org.slf4j.LoggerFactory;
 
 public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory 
 {
-  @Deprecated
-  private static Map<String,char[]> CHARSETS = new HashMap<String,char[]>();
-  static {
-    CHARSETS.put("UnicodeGreek",GreekCharsets.UnicodeGreek);
-    CHARSETS.put("ISO",GreekCharsets.ISO);
-    CHARSETS.put("CP1253",GreekCharsets.CP1253);
-  }
-  
-  private char[] charset = GreekCharsets.UnicodeGreek;
 
   private static Logger logger = LoggerFactory.getLogger(GreekLowerCaseFilterFactory.class);
   
   @Override
   public void init(Map<String, String> args) {
     super.init(args);
-    String charsetName = args.get("charset");
-    if (null != charsetName) {
-      charset = CHARSETS.get(charsetName);
-      if (charset.equals(GreekCharsets.UnicodeGreek))
-        logger.warn("Specifying UnicodeGreek is no longer required (default).  "
-            + "Use of the charset parameter will cause an error in Solr 1.5");
-      else
-        logger.warn("Support for this custom encoding is deprecated.  "
-            + "Use of the charset parameter will cause an error in Solr 1.5");
-    } else {
-      charset = GreekCharsets.UnicodeGreek; /* default to unicode */
-    }
-    if (null == charset) {
-      throw new SolrException(ErrorCode.SERVER_ERROR,
-                              "Don't understand charset: " + charsetName);
-    }
+
   }
   public GreekLowerCaseFilter create(TokenStream in) {
-    return new GreekLowerCaseFilter(in,charset);
+    return new GreekLowerCaseFilter(in);
   }
 }
 

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/HTMLStripStandardTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/HTMLStripStandardTokenizerFactory.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/HTMLStripStandardTokenizerFactory.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/HTMLStripStandardTokenizerFactory.java Sun Mar 14 20:58:32 2010
@@ -20,6 +20,7 @@ package org.apache.solr.analysis;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
 
 import java.io.Reader;
 import java.io.IOException;
@@ -31,11 +32,6 @@ import java.io.IOException;
 @Deprecated
 public class HTMLStripStandardTokenizerFactory extends BaseTokenizerFactory {
   public Tokenizer create(Reader input) {
-    return new StandardTokenizer(new HTMLStripReader(input)) {
-      @Override
-      public void reset(Reader reader) throws IOException {
-        super.reset(new HTMLStripReader(reader));
-      }
-    };
+    return new StandardTokenizer(Version.LUCENE_24, new HTMLStripReader(input));
   }
 }

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java Sun Mar 14 20:58:32 2010
@@ -20,6 +20,8 @@ package org.apache.solr.analysis;
 import java.io.IOException;
 
 import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 
 /**
  * When the plain text is extracted from documents, we will often have many words hyphenated and broken into
@@ -52,46 +54,89 @@ import org.apache.lucene.analysis.*;
  */
 public final class HyphenatedWordsFilter extends TokenFilter {
 
-	public HyphenatedWordsFilter(TokenStream in) {
-		super(in);
-	}
+  private final TermAttribute termAttribute = (TermAttribute) addAttribute(TermAttribute.class);
+  private final OffsetAttribute offsetAttribute = (OffsetAttribute) addAttribute(OffsetAttribute.class);
+  
+  private final StringBuilder hyphenated = new StringBuilder();
+  private State savedState;
 
+  /**
+   * Creates a new HyphenatedWordsFilter
+   *
+   * @param in TokenStream that will be filtered
+   */
+  public HyphenatedWordsFilter(TokenStream in) {
+    super(in);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean incrementToken() throws IOException {
+    while (input.incrementToken()) {
+      char[] term = termAttribute.termBuffer();
+      int termLength = termAttribute.termLength();
+      
+      if (termLength > 0 && term[termLength - 1] == '-') {
+        // a hyphenated word
+        // capture the state of the first token only
+        if (savedState == null) {
+          savedState = captureState();
+        }
+        hyphenated.append(term, 0, termLength - 1);
+      } else if (savedState == null) {
+        // not part of a hyphenated word.
+        return true;
+      } else {
+        // the final portion of a hyphenated word
+        hyphenated.append(term, 0, termLength);
+        unhyphenate();
+        return true;
+      }
+    }
+    
+    if (savedState != null) {
+      // the final term ends with a hyphen
+      // add back the hyphen, for backwards compatibility.
+      hyphenated.append('-');
+      unhyphenate();
+      return true;
+    }
+    
+    return false;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    hyphenated.setLength(0);
+    savedState = null;
+  }
 
+  // ================================================= Helper Methods ================================================
 
   /**
-	 * @inheritDoc
-	 * @see org.apache.lucene.analysis.TokenStream#next()
-	 */
-	public final Token next(Token in) throws IOException {
-		StringBuilder termText = new StringBuilder(25);
-		int startOffset = -1, firstPositionIncrement = -1, wordsMerged = 0;
-		Token lastToken = null;
-		for (Token token = input.next(in); token != null; token = input.next()) {
-			termText.append(token.termBuffer(), 0, token.termLength());
-			//current token ends with hyphen -> grab the next token and glue them together
-			if (termText.charAt(termText.length() - 1) == '-') {
-				wordsMerged++;
-				//remove the hyphen
-				termText.setLength(termText.length()-1);
-				if (startOffset == -1) {
-					startOffset = token.startOffset();
-					firstPositionIncrement = token.getPositionIncrement();
-				}
-				lastToken = token;
-			} else {
-				//shortcut returns token
-				if (wordsMerged == 0)
-					return token;
-				Token mergedToken = new Token(termText.toString(), startOffset, token.endOffset(), token.type());
-				mergedToken.setPositionIncrement(firstPositionIncrement);
-				return mergedToken;
-			}
-		}
-		//last token ending with hyphen? - we know that we have only one token in
-		//this situation, so we can safely return firstToken
-		if (startOffset != -1)
-			return lastToken;
-		else
-			return null; //end of token stream
-	}
+   * Writes the joined unhyphenated term
+   */
+  private void unhyphenate() {
+    int endOffset = offsetAttribute.endOffset();
+    
+    restoreState(savedState);
+    savedState = null;
+    
+    char term[] = termAttribute.termBuffer();
+    int length = hyphenated.length();
+    if (length > termAttribute.termLength()) {
+      term = termAttribute.resizeTermBuffer(length);
+    }
+    
+    hyphenated.getChars(0, length, term, 0);
+    termAttribute.setTermLength(length);
+    offsetAttribute.setOffset(offsetAttribute.startOffset(), endOffset);
+    hyphenated.setLength(0);
+  }
 }

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/KeepWordFilterFactory.java Sun Mar 14 20:58:32 2010
@@ -75,7 +75,7 @@ public class KeepWordFilterFactory exten
   }
 
   public KeepWordFilter create(TokenStream input) {
-    return new KeepWordFilter(input, words, ignoreCase);
+    return new KeepWordFilter(input, (Set)words, ignoreCase);
   }
 
   public CharArraySet getWords() {

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java Sun Mar 14 20:58:32 2010
@@ -17,41 +17,69 @@
 
 package org.apache.solr.analysis;
 
-import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.solr.util.ArraysUtils;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.solr.util.CharArrayMap;
 
 import java.io.IOException;
 
 /**
- * A TokenFilter which filters out Tokens at the same position and Term
- * text as the previous token in the stream.
+ * A TokenFilter which filters out Tokens at the same position and Term text as the previous token in the stream.
  */
-public class RemoveDuplicatesTokenFilter extends BufferedTokenStream {
-  public RemoveDuplicatesTokenFilter(TokenStream input) {super(input);}
-  protected Token process(Token t) throws IOException {
-    Token tok = read();
-    while (tok != null && tok.getPositionIncrement()==0) {
-      if (null != t) {
-        write(t);
-        t = null;
-      }
-      boolean dup=false;
-      for (Token outTok : output()) {
-        int tokLen = tok.termLength();
-        if (outTok.termLength() == tokLen && ArraysUtils.equals(outTok.termBuffer(), 0, tok.termBuffer(), 0, tokLen)) {
-          dup=true;
-          //continue;;
-        }
+public final class RemoveDuplicatesTokenFilter extends TokenFilter {
+
+  private final TermAttribute termAttribute = (TermAttribute) addAttribute(TermAttribute.class);
+  private final PositionIncrementAttribute posIncAttribute =  (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
+  
+  // keep a seen 'set' after each term with posInc > 0
+  // for now use CharArrayMap vs CharArraySet, as it has clear()
+  private final CharArrayMap<Boolean> previous = new CharArrayMap<Boolean>(8, false);
+
+  /**
+   * Creates a new RemoveDuplicatesTokenFilter
+   *
+   * @param in TokenStream that will be filtered
+   */
+  public RemoveDuplicatesTokenFilter(TokenStream in) {
+    super(in);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean incrementToken() throws IOException {
+    while (input.incrementToken()) {
+      final char term[] = termAttribute.termBuffer();
+      final int length = termAttribute.termLength();
+      final int posIncrement = posIncAttribute.getPositionIncrement();
+      
+      if (posIncrement > 0) {
+        previous.clear();
       }
-      if (!dup){
-        write(tok);
+      
+      boolean duplicate = (posIncrement == 0 && previous.get(term, 0, length) != null);
+      
+      // clone the term, and add to the set of seen terms.
+      char saved[] = new char[length];
+      System.arraycopy(term, 0, saved, 0, length);
+      previous.put(saved, Boolean.TRUE);
+      
+      if (!duplicate) {
+        return true;
       }
-      tok = read();
-    }
-    if (tok != null) {
-      pushBack(tok);
     }
-    return t;
+    return false;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    previous.clear();
   }
 } 

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianCommon.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianCommon.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianCommon.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianCommon.java Sun Mar 14 20:58:32 2010
@@ -16,46 +16,46 @@
  * limitations under the License.
  */
 
-package org.apache.solr.analysis;
-import org.apache.lucene.analysis.ru.*;
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.solr.core.SolrConfig;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrException.ErrorCode;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@Deprecated
-public class RussianCommon {
-  
-  private static Logger logger = LoggerFactory.getLogger(RussianCommon.class);
-  
-  private static Map<String,char[]> CHARSETS = new HashMap<String,char[]>();
-  static {
-    CHARSETS.put("UnicodeRussian",RussianCharsets.UnicodeRussian);
-    CHARSETS.put("KOI8",RussianCharsets.KOI8);
-    CHARSETS.put("CP1251",RussianCharsets.CP1251);
-  }
-  
-  public static char[] getCharset(String name) {
-    if (null == name)
-      return RussianCharsets.UnicodeRussian;
-
-    char[] charset = CHARSETS.get(name);
-    
-    if (charset.equals(RussianCharsets.UnicodeRussian))
-      logger.warn("Specifying UnicodeRussian is no longer required (default).  "
-          + "Use of the charset parameter will cause an error in Solr 1.5");
-    else
-      logger.warn("Support for this custom encoding is deprecated.  "
-          + "Use of the charset parameter will cause an error in Solr 1.5");
-    
-    if (null == charset) {
-      throw new SolrException(ErrorCode.SERVER_ERROR,
-                              "Don't understand charset: " + name);
-    }
-    return charset;
-  }
-}
+//package org.apache.solr.analysis;
+//import org.apache.lucene.analysis.ru.*;
+//import java.util.Map;
+//import java.util.HashMap;
+//import org.apache.solr.core.SolrConfig;
+//import org.apache.solr.common.SolrException;
+//import org.apache.solr.common.SolrException.ErrorCode;
+//import org.slf4j.Logger;
+//import org.slf4j.LoggerFactory;
+//
+//@Deprecated
+//public class RussianCommon {
+//  
+//  private static Logger logger = LoggerFactory.getLogger(RussianCommon.class);
+//  
+//  private static Map<String,char[]> CHARSETS = new HashMap<String,char[]>();
+//  static {
+//    CHARSETS.put("UnicodeRussian",RussianCharsets.UnicodeRussian);
+//    CHARSETS.put("KOI8",RussianCharsets.KOI8);
+//    CHARSETS.put("CP1251",RussianCharsets.CP1251);
+//  }
+//  
+//  public static char[] getCharset(String name) {
+//    if (null == name)
+//      return RussianCharsets.UnicodeRussian;
+//
+//    char[] charset = CHARSETS.get(name);
+//    
+//    if (charset.equals(RussianCharsets.UnicodeRussian))
+//      logger.warn("Specifying UnicodeRussian is no longer required (default).  "
+//          + "Use of the charset parameter will cause an error in Solr 1.5");
+//    else
+//      logger.warn("Support for this custom encoding is deprecated.  "
+//          + "Use of the charset parameter will cause an error in Solr 1.5");
+//    
+//    if (null == charset) {
+//      throw new SolrException(ErrorCode.SERVER_ERROR,
+//                              "Don't understand charset: " + name);
+//    }
+//    return charset;
+//  }
+//}
 

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java Sun Mar 14 20:58:32 2010
@@ -23,17 +23,10 @@ import java.util.Map;
 import org.apache.lucene.analysis.ru.RussianLetterTokenizer;
 
 public class RussianLetterTokenizerFactory extends BaseTokenizerFactory {
-  @Deprecated
-  private char[] charset;
-  
-  @Override
-  public void init(Map<String, String> args) {
-    super.init(args);
-    charset = RussianCommon.getCharset(args.get("charset"));
-  }
+
 
   public RussianLetterTokenizer create(Reader in) {
-    return new RussianLetterTokenizer(in,charset);
+    return new RussianLetterTokenizer(in);
   }
 }
 

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java Sun Mar 14 20:58:32 2010
@@ -23,17 +23,9 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.lucene.analysis.ru.RussianLowerCaseFilter;
 
 public class RussianLowerCaseFilterFactory extends BaseTokenFilterFactory {
-  @Deprecated
-  private char[] charset;
-  
-  @Override
-  public void init(Map<String, String> args) {
-    super.init(args);
-    charset = RussianCommon.getCharset(args.get("charset"));
-  }
 
   public RussianLowerCaseFilter create(TokenStream in) {
-    return new RussianLowerCaseFilter(in,charset);
+    return new RussianLowerCaseFilter(in);
   }
 }
 

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java Sun Mar 14 20:58:32 2010
@@ -25,16 +25,10 @@ import org.apache.lucene.analysis.TokenS
 import org.apache.lucene.analysis.ru.RussianStemFilter;
 
 public class RussianStemFilterFactory extends BaseTokenFilterFactory {
-  @Deprecated
-  private char[] charset;
-  
-  public void init(Map<String, String> args) {
-    super.init(args);
-    charset = RussianCommon.getCharset(args.get("charset"));
-  }
+
 
   public RussianStemFilter create(TokenStream in) {
-    return new RussianStemFilter(in,charset);
+    return new RussianStemFilter(in);
   }
 }
 

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/StandardTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/StandardTokenizerFactory.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/StandardTokenizerFactory.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/StandardTokenizerFactory.java Sun Mar 14 20:58:32 2010
@@ -19,6 +19,7 @@ package org.apache.solr.analysis;
 
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.util.Version;
 
 import java.io.Reader;
 
@@ -28,6 +29,6 @@ import java.io.Reader;
 
 public class StandardTokenizerFactory extends BaseTokenizerFactory {
   public StandardTokenizer create(Reader input) {
-    return new StandardTokenizer(input);
+    return new StandardTokenizer(Version.LUCENE_24, input);
   }
 }

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/StopFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/StopFilterFactory.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/StopFilterFactory.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/StopFilterFactory.java Sun Mar 14 20:58:32 2010
@@ -58,7 +58,7 @@ public class StopFilterFactory extends B
         throw new RuntimeException(e);
       }
     } else {
-      stopWords = (CharArraySet) StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS, ignoreCase);
+      stopWords = (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET;
     }
   }
   //Force the use of a char array set, as it is the most performant, although this may break things if Lucene ever goes away from it.  See SOLR-1095
@@ -79,8 +79,7 @@ public class StopFilterFactory extends B
   }
 
   public StopFilter create(TokenStream input) {
-    StopFilter stopFilter = new StopFilter(input,stopWords,ignoreCase);
-    stopFilter.setEnablePositionIncrements(enablePositionIncrements);
+    StopFilter stopFilter = new StopFilter(enablePositionIncrements, input,stopWords,ignoreCase);
     return stopFilter;
   }
 }

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/SynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/SynonymFilter.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/SynonymFilter.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/SynonymFilter.java Sun Mar 14 20:58:32 2010
@@ -20,6 +20,12 @@ package org.apache.solr.analysis;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeSource;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -39,11 +45,16 @@ import java.util.LinkedList;
 public class SynonymFilter extends TokenFilter {
 
   private final SynonymMap map;  // Map<String, SynonymMap>
-  private Iterator<Token> replacement;  // iterator over generated tokens
+  private Iterator<AttributeSource> replacement;  // iterator over generated tokens
 
   public SynonymFilter(TokenStream in, SynonymMap map) {
     super(in);
     this.map = map;
+    // just ensuring these exist attributes exist...
+    addAttribute(TermAttribute.class);
+    addAttribute(PositionIncrementAttribute.class);
+    addAttribute(OffsetAttribute.class);
+    addAttribute(TypeAttribute.class);
   }
 
 
@@ -65,74 +76,100 @@ public class SynonymFilter extends Token
    *  - preserve original positionIncrement of first matched token
    */
   @Override
-  public Token next(Token target) throws IOException {
+  public boolean incrementToken() throws IOException {
     while (true) {
       // if there are any generated tokens, return them... don't try any
       // matches against them, as we specifically don't want recursion.
       if (replacement!=null && replacement.hasNext()) {
-        return replacement.next();
+        copy(this, replacement.next());
+        return true;
       }
 
       // common case fast-path of first token not matching anything
-      Token firstTok = nextTok(target);
-      if (firstTok == null) return null;
-      SynonymMap result = map.submap!=null ? map.submap.get(firstTok.termBuffer(), 0, firstTok.termLength()) : null;
-      if (result == null) return firstTok;
+      AttributeSource firstTok = nextTok();
+      if (firstTok == null) return false;
+      TermAttribute termAtt = (TermAttribute) firstTok.addAttribute(TermAttribute.class);
+      SynonymMap result = map.submap!=null ? map.submap.get(termAtt.termBuffer(), 0, termAtt.termLength()) : null;
+      if (result == null) {
+        copy(this, firstTok);
+        return true;
+      }
 
+      // fast-path failed, clone ourselves if needed
+      if (firstTok == this)
+        firstTok = cloneAttributes();
       // OK, we matched a token, so find the longest match.
 
-      matched = new LinkedList<Token>();
+      matched = new LinkedList<AttributeSource>();
 
       result = match(result);
 
       if (result==null) {
         // no match, simply return the first token read.
-        return firstTok;
+        copy(this, firstTok);
+        return true;
       }
 
       // reuse, or create new one each time?
-      ArrayList<Token> generated = new ArrayList<Token>(result.synonyms.length + matched.size() + 1);
+      ArrayList<AttributeSource> generated = new ArrayList<AttributeSource>(result.synonyms.length + matched.size() + 1);
 
       //
       // there was a match... let's generate the new tokens, merging
       // in the matched tokens (position increments need adjusting)
       //
-      Token lastTok = matched.isEmpty() ? firstTok : matched.getLast();
+      AttributeSource lastTok = matched.isEmpty() ? firstTok : matched.getLast();
       boolean includeOrig = result.includeOrig();
 
-      Token origTok = includeOrig ? firstTok : null;
-      int origPos = firstTok.getPositionIncrement();  // position of origTok in the original stream
+      AttributeSource origTok = includeOrig ? firstTok : null;
+      PositionIncrementAttribute firstPosIncAtt = (PositionIncrementAttribute) firstTok.addAttribute(PositionIncrementAttribute.class);
+      int origPos = firstPosIncAtt.getPositionIncrement();  // position of origTok in the original stream
       int repPos=0; // curr position in replacement token stream
       int pos=0;  // current position in merged token stream
 
       for (int i=0; i<result.synonyms.length; i++) {
         Token repTok = result.synonyms[i];
-        Token newTok = new Token(firstTok.startOffset(), lastTok.endOffset(), firstTok.type());
-        newTok.setTermBuffer(repTok.termBuffer(), 0, repTok.termLength());
+        AttributeSource newTok = firstTok.cloneAttributes();
+        TermAttribute newTermAtt = (TermAttribute) newTok.addAttribute(TermAttribute.class);
+        OffsetAttribute newOffsetAtt = (OffsetAttribute) newTok.addAttribute(OffsetAttribute.class);
+        TypeAttribute newTypeAtt = (TypeAttribute) newTok.addAttribute(TypeAttribute.class);
+        PositionIncrementAttribute newPosIncAtt = (PositionIncrementAttribute) newTok.addAttribute(PositionIncrementAttribute.class);
+
+        OffsetAttribute lastOffsetAtt = (OffsetAttribute) lastTok.addAttribute(OffsetAttribute.class);
+
+        newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
+        newTermAtt.setTermBuffer(repTok.termBuffer(), 0, repTok.termLength());
         repPos += repTok.getPositionIncrement();
         if (i==0) repPos=origPos;  // make position of first token equal to original
 
         // if necessary, insert original tokens and adjust position increment
         while (origTok != null && origPos <= repPos) {
-          origTok.setPositionIncrement(origPos-pos);
+          PositionIncrementAttribute origPosInc = (PositionIncrementAttribute) origTok.addAttribute(PositionIncrementAttribute.class);
+          origPosInc.setPositionIncrement(origPos-pos);
           generated.add(origTok);
-          pos += origTok.getPositionIncrement();
+          pos += origPosInc.getPositionIncrement();
           origTok = matched.isEmpty() ? null : matched.removeFirst();
-          if (origTok != null) origPos += origTok.getPositionIncrement();
+          if (origTok != null) {
+            origPosInc = (PositionIncrementAttribute) origTok.addAttribute(PositionIncrementAttribute.class);
+            origPos += origPosInc.getPositionIncrement();
+          }
         }
 
-        newTok.setPositionIncrement(repPos - pos);
+        newPosIncAtt.setPositionIncrement(repPos - pos);
         generated.add(newTok);
-        pos += newTok.getPositionIncrement();
+        pos += newPosIncAtt.getPositionIncrement();
       }
 
       // finish up any leftover original tokens
       while (origTok!=null) {
-        origTok.setPositionIncrement(origPos-pos);
+        PositionIncrementAttribute origPosInc = (PositionIncrementAttribute) origTok.addAttribute(PositionIncrementAttribute.class);
+        origPosInc.setPositionIncrement(origPos-pos);
         generated.add(origTok);
-        pos += origTok.getPositionIncrement();
+        pos += origPosInc.getPositionIncrement();
         origTok = matched.isEmpty() ? null : matched.removeFirst();
-        if (origTok != null) origPos += origTok.getPositionIncrement();
+        if (origTok != null) {
+          origPosInc = (PositionIncrementAttribute) origTok.addAttribute(PositionIncrementAttribute.class);
+          origPos += origPosInc.getPositionIncrement();
+        }
       }
 
       // what if we replaced a longer sequence with a shorter one?
@@ -151,27 +188,22 @@ public class SynonymFilter extends Token
   // Defer creation of the buffer until the first time it is used to
   // optimize short fields with no matches.
   //
-  private LinkedList<Token> buffer;
-  private LinkedList<Token> matched;
-
-  private Token nextTok() throws IOException {
-    if (buffer!=null && !buffer.isEmpty()) {
-      return buffer.removeFirst();
-    } else {
-      return input.next();
-    }
-  }
+  private LinkedList<AttributeSource> buffer;
+  private LinkedList<AttributeSource> matched;
 
-  private Token nextTok(Token target) throws IOException {
+  private AttributeSource nextTok() throws IOException {
     if (buffer!=null && !buffer.isEmpty()) {
       return buffer.removeFirst();
     } else {
-      return input.next(target);
+      if (input.incrementToken()) {
+        return this;
+      } else
+        return null;
     }
   }
 
-  private void pushTok(Token t) {
-    if (buffer==null) buffer=new LinkedList<Token>();
+  private void pushTok(AttributeSource t) {
+    if (buffer==null) buffer=new LinkedList<AttributeSource>();
     buffer.addFirst(t);
   }
 
@@ -179,15 +211,20 @@ public class SynonymFilter extends Token
     SynonymMap result = null;
 
     if (map.submap != null) {
-      Token tok = nextTok();
+      AttributeSource tok = nextTok();
       if (tok != null) {
+        // clone ourselves.
+        if (tok == this)
+          tok = cloneAttributes();
         // check for positionIncrement!=1?  if>1, should not match, if==0, check multiple at this level?
-        SynonymMap subMap = map.submap.get(tok.termBuffer(), 0, tok.termLength());
+        TermAttribute termAtt = (TermAttribute) tok.getAttribute(TermAttribute.class);
+        SynonymMap subMap = map.submap.get(termAtt.termBuffer(), 0, termAtt.termLength());
 
         if (subMap != null) {
           // recurse
           result = match(subMap);
         }
+;
         if (result != null) {
           matched.addFirst(tok);
         } else {
@@ -205,6 +242,15 @@ public class SynonymFilter extends Token
     return result;
   }
 
+  private void copy(AttributeSource target, AttributeSource source) {
+    if (target == source)
+      return;
+    for (Iterator<AttributeImpl> sourceIt = source.getAttributeImplsIterator(), targetIt=target.getAttributeImplsIterator(); 
+         sourceIt.hasNext();) { 
+           sourceIt.next().copyTo(targetIt.next()); 
+    } 
+  }
+
   @Override
   public void reset() throws IOException {
     input.reset();

Modified: lucene/solr/branches/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java?rev=922957&r1=922956&r2=922957&view=diff
==============================================================================
--- lucene/solr/branches/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java (original)
+++ lucene/solr/branches/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java Sun Mar 14 20:58:32 2010
@@ -19,6 +19,7 @@ package org.apache.solr.analysis;
 
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.solr.common.ResourceLoader;
 import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.util.plugin.ResourceLoaderAware;
@@ -135,8 +136,9 @@ public class SynonymFilterFactory extend
     TokenStream ts = loadTokenizer(tokFactory, reader);
     List<String> tokList = new ArrayList<String>();
     try {
-      for( Token token = ts.next(); token != null; token = ts.next() ){
-        String text = new String(token.termBuffer(), 0, token.termLength());
+      TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class);
+      while (ts.incrementToken()){
+        String text = new String(termAtt.termBuffer(), 0, termAtt.termLength());
         if( text.length() > 0 )
           tokList.add( text );
       }