You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/09/21 19:22:27 UTC

svn commit: r1388574 [6/45] - in /lucene/dev/branches/LUCENE-2878: ./ dev-tools/ dev-tools/eclipse/ dev-tools/eclipse/dot.settings/ dev-tools/idea/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/ dev-tools/idea/lucene/analy...

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro Fri Sep 21 17:21:34 2012
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-// Generated using ICU4J 49.1.0.0 on Monday, August 6, 2012 3:57:23 PM UTC
+// Generated using ICU4J 49.1.0.0 on Wednesday, September 19, 2012 10:23:34 PM UTC
 // by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
 
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Fri Sep 21 17:21:34 2012
@@ -183,8 +183,7 @@ public final class StandardTokenizer ext
   }
 
   @Override
-  public void setReader(Reader reader) throws IOException {
-    super.setReader(reader);
-    scanner.yyreset(reader);
+  public void reset() throws IOException {
+    scanner.yyreset(input);
   }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java Fri Sep 21 17:21:34 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/19/12 6:23 PM */
 
 package org.apache.lucene.analysis.standard;
 
@@ -936,7 +936,7 @@ public final class StandardTokenizerImpl
       }     
     }
 
-	// numRead < 0
+    // numRead < 0
     return true;
   }
 
@@ -1157,36 +1157,36 @@ public final class StandardTokenizerImpl
       zzMarkedPos = zzMarkedPosL;
 
       switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 2: 
-          { return WORD_TYPE;
+        case 1: 
+          { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
           }
         case 9: break;
-        case 5: 
-          { return SOUTH_EAST_ASIAN_TYPE;
+        case 2: 
+          { return WORD_TYPE;
           }
         case 10: break;
-        case 4: 
-          { return KATAKANA_TYPE;
+        case 3: 
+          { return NUMERIC_TYPE;
           }
         case 11: break;
-        case 6: 
-          { return IDEOGRAPHIC_TYPE;
+        case 4: 
+          { return KATAKANA_TYPE;
           }
         case 12: break;
-        case 8: 
-          { return HANGUL_TYPE;
+        case 5: 
+          { return SOUTH_EAST_ASIAN_TYPE;
           }
         case 13: break;
-        case 3: 
-          { return NUMERIC_TYPE;
+        case 6: 
+          { return IDEOGRAPHIC_TYPE;
           }
         case 14: break;
         case 7: 
           { return HIRAGANA_TYPE;
           }
         case 15: break;
-        case 1: 
-          { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
+        case 8: 
+          { return HANGUL_TYPE;
           }
         case 16: break;
         default: 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex Fri Sep 21 17:21:34 2012
@@ -115,8 +115,8 @@ HiraganaEx = {Hiragana} ({Format} | {Ext
 
 %%
 
-// UAX#29 WB1. 	sot 	÷ 	
-//        WB2. 		÷ 	eot
+// UAX#29 WB1.   sot   ÷
+//        WB2.     ÷   eot
 //
 <<EOF>> { return StandardTokenizerInterface.YYEOF; }
 
@@ -166,7 +166,7 @@ HiraganaEx = {Hiragana} ({Format} | {Ext
 //    annex.  That means that satisfactory treatment of languages like Chinese
 //    or Thai requires special handling.
 // 
-// In Unicode 6.0, only one character has the \p{Line_Break = Contingent_Break}
+// In Unicode 6.1, only one character has the \p{Line_Break = Contingent_Break}
 // property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
 //
 // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Fri Sep 21 17:21:34 2012
@@ -162,8 +162,7 @@ public final class UAX29URLEmailTokenize
   }
 
   @Override
-  public void setReader(Reader reader) throws IOException {
-    super.setReader(reader);
-    scanner.yyreset(reader);
+  public void reset() throws IOException {
+    scanner.yyreset(input);
   }
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java Fri Sep 21 17:21:34 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/19/12 6:23 PM */
 
 package org.apache.lucene.analysis.standard;
 
@@ -4126,7 +4126,7 @@ public final class UAX29URLEmailTokenize
       }     
     }
 
-	// numRead < 0
+    // numRead < 0
     return true;
   }
 
@@ -4347,50 +4347,50 @@ public final class UAX29URLEmailTokenize
       zzMarkedPos = zzMarkedPosL;
 
       switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 11: 
-          // lookahead expression with fixed base length
-          zzMarkedPos = zzStartRead + 6;
-          { return WORD_TYPE;
+        case 1: 
+          { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
           }
         case 12: break;
         case 2: 
           { return WORD_TYPE;
           }
         case 13: break;
-        case 5: 
-          { return SOUTH_EAST_ASIAN_TYPE;
+        case 3: 
+          { return NUMERIC_TYPE;
           }
         case 14: break;
-        case 1: 
-          { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
+        case 4: 
+          { return KATAKANA_TYPE;
           }
         case 15: break;
-        case 10: 
-          { return URL_TYPE;
+        case 5: 
+          { return SOUTH_EAST_ASIAN_TYPE;
           }
         case 16: break;
-        case 9: 
-          { return EMAIL_TYPE;
+        case 6: 
+          { return IDEOGRAPHIC_TYPE;
           }
         case 17: break;
-        case 4: 
-          { return KATAKANA_TYPE;
+        case 7: 
+          { return HIRAGANA_TYPE;
           }
         case 18: break;
-        case 6: 
-          { return IDEOGRAPHIC_TYPE;
-          }
-        case 19: break;
         case 8: 
           { return HANGUL_TYPE;
           }
+        case 19: break;
+        case 9: 
+          { return EMAIL_TYPE;
+          }
         case 20: break;
-        case 3: 
-          { return NUMERIC_TYPE;
+        case 10: 
+          { return URL_TYPE;
           }
         case 21: break;
-        case 7: 
-          { return HIRAGANA_TYPE;
+        case 11: 
+          // lookahead expression with fixed base length
+          zzMarkedPos = zzStartRead + 6;
+          { return WORD_TYPE;
           }
         case 22: break;
         default: 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex Fri Sep 21 17:21:34 2012
@@ -200,8 +200,8 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNam
 
 %%
 
-// UAX#29 WB1. 	sot 	÷ 	
-//        WB2. 		÷ 	eot
+// UAX#29 WB1.   sot   ÷
+//        WB2.     ÷   eot
 //
 <<EOF>> { return StandardTokenizerInterface.YYEOF; }
 
@@ -258,7 +258,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNam
 //    annex.  That means that satisfactory treatment of languages like Chinese
 //    or Thai requires special handling.
 // 
-// In Unicode 6.0, only one character has the \p{Line_Break = Contingent_Break}
+// In Unicode 6.1, only one character has the \p{Line_Break = Contingent_Break}
 // property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
 //
 // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java Fri Sep 21 17:21:34 2012
@@ -227,8 +227,8 @@ public class SynonymMap {
     /**
      * Add a phrase->phrase synonym mapping.
      * Phrases are character sequences where words are
-     * separated with character zero (\u0000).  Empty words
-     * (two \u0000s in a row) are not allowed in the input nor
+     * separated with character zero (U+0000).  Empty words
+     * (two U+0000s in a row) are not allowed in the input nor
      * the output!
      * 
      * @param input input phrase

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java Fri Sep 21 17:21:34 2012
@@ -37,6 +37,15 @@ import java.util.regex.PatternSyntaxExce
 /**
  * Abstract parent class for analysis factories {@link TokenizerFactory},
  * {@link TokenFilterFactory} and {@link CharFilterFactory}.
+ * <p>
+ * The typical lifecycle for a factory consumer is:
+ * <ol>
+ *   <li>Create factory via its a no-arg constructor
+ *   <li>Set version emulation by calling {@link #setLuceneMatchVersion(Version)}
+ *   <li>Calls {@link #init(Map)} passing arguments as key-value mappings.
+ *   <li>(Optional) If the factory uses resources such as files, {@link ResourceLoaderAware#inform(ResourceLoader)} is called to initialize those resources.
+ *   <li>Consumer calls create() to obtain instances.
+ * </ol>
  */
 public abstract class AbstractAnalysisFactory {
 
@@ -46,6 +55,9 @@ public abstract class AbstractAnalysisFa
   /** the luceneVersion arg */
   protected Version luceneMatchVersion = null;
 
+  /**
+   * Initialize this factory via a set of key-value pairs.
+   */
   public void init(Map<String,String> args) {
     this.args = args;
   }
@@ -104,6 +116,9 @@ public abstract class AbstractAnalysisFa
     return Boolean.parseBoolean(s);
   }
 
+  /**
+   * Compiles a pattern for the value of the specified argument key <code>name</code> 
+   */
   protected Pattern getPattern(String name) {
     try {
       String pat = args.get(name);
@@ -118,6 +133,10 @@ public abstract class AbstractAnalysisFa
     }
   }
 
+  /**
+   * Returns as {@link CharArraySet} from wordFiles, which
+   * can be a comma-separated list of filenames
+   */
   protected CharArraySet getWordSet(ResourceLoader loader,
       String wordFiles, boolean ignoreCase) throws IOException {
     assureMatchVersion();
@@ -137,6 +156,9 @@ public abstract class AbstractAnalysisFa
     return words;
   }
   
+  /**
+   * Returns the resource's lines (with content treated as UTF-8)
+   */
   protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
     return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
   }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java Fri Sep 21 17:21:34 2012
@@ -78,7 +78,8 @@ public abstract class CharTokenizer exte
     charUtils = CharacterUtils.getInstance(matchVersion);
   }
   
-  private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
+  // note: bufferIndex is -1 here to best-effort AIOOBE consumers that don't call reset()
+  private int offset = 0, bufferIndex = -1, dataLen = 0, finalOffset = 0;
   private static final int MAX_WORD_LEN = 255;
   private static final int IO_BUFFER_SIZE = 4096;
   
@@ -162,8 +163,7 @@ public abstract class CharTokenizer exte
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
+  public void reset() throws IOException {
     bufferIndex = 0;
     offset = 0;
     dataLen = 0;

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java Fri Sep 21 17:21:34 2012
@@ -34,7 +34,7 @@ import org.apache.lucene.analysis.fr.Fre
  * &lt;/fieldType&gt;</pre>
  *
  */
-public class ElisionFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+public class ElisionFilterFactory extends TokenFilterFactory implements ResourceLoaderAware, MultiTermAwareComponent {
 
   private CharArraySet articles;
 
@@ -53,5 +53,10 @@ public class ElisionFilterFactory extend
   public ElisionFilter create(TokenStream input) {
     return new ElisionFilter(input, articles);
   }
+
+  @Override
+  public AbstractAnalysisFactory getMultiTermComponent() {
+    return this;
+  }
 }
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java Fri Sep 21 17:21:34 2012
@@ -27,5 +27,9 @@ import java.io.IOException;
  */
 public interface ResourceLoaderAware {
 
+  /**
+   * Initializes this component with the provided ResourceLoader
+   * (used for loading classes, files, etc).
+   */
   void inform(ResourceLoader loader) throws IOException;
 }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java Fri Sep 21 17:21:34 2012
@@ -19,6 +19,9 @@ package org.apache.lucene.analysis.util;
 
 /** Some commonly-used stemming functions */
 public class StemmerUtil {
+  /** no instance */
+  private StemmerUtil() {}
+
   /**
    * Returns true if the character array starts with the suffix.
    * 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java Fri Sep 21 17:21:34 2012
@@ -36,7 +36,10 @@ import org.apache.lucene.util.Version;
  */
 public class WordlistLoader {
   
-  private static final int INITITAL_CAPACITY = 16;
+  private static final int INITIAL_CAPACITY = 16;
+  
+  /** no instance */
+  private WordlistLoader() {}
   
   /**
    * Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
@@ -74,7 +77,7 @@ public class WordlistLoader {
    * @return A {@link CharArraySet} with the reader's words
    */
   public static CharArraySet getWordSet(Reader reader, Version matchVersion) throws IOException {
-    return getWordSet(reader, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
+    return getWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
   }
 
   /**
@@ -89,7 +92,7 @@ public class WordlistLoader {
    * @return A CharArraySet with the reader's words
    */
   public static CharArraySet getWordSet(Reader reader, String comment, Version matchVersion) throws IOException {
-    return getWordSet(reader, comment, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
+    return getWordSet(reader, comment, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
   }
 
   /**
@@ -171,7 +174,7 @@ public class WordlistLoader {
    * @return A {@link CharArraySet} with the reader's words
    */
   public static CharArraySet getSnowballWordSet(Reader reader, Version matchVersion) throws IOException {
-    return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
+    return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
   }
 
 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java Fri Sep 21 17:21:34 2012
@@ -318,19 +318,13 @@ public final class WikipediaTokenizer ex
   */
   @Override
   public void reset() throws IOException {
-    super.reset();
+    scanner.yyreset(input);
     tokens = null;
     scanner.reset();
     first = true;
   }
 
   @Override
-  public void setReader(Reader reader) throws IOException {
-    super.setReader(reader);
-    scanner.yyreset(input);
-  }
-
-  @Override
   public void end() {
     // set final offset
     final int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java Fri Sep 21 17:21:34 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/19/12 6:23 PM */
 
 package org.apache.lucene.analysis.wikipedia;
 
@@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokena
 /**
  * This class is a scanner generated by 
  * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 8/6/12 11:57 AM from the specification file
- * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
+ * on 9/19/12 6:23 PM from the specification file
+ * <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
  */
 class WikipediaTokenizerImpl {
 
@@ -37,16 +37,16 @@ class WikipediaTokenizerImpl {
   private static final int ZZ_BUFFERSIZE = 4096;
 
   /** lexical states */
-  public static final int THREE_SINGLE_QUOTES_STATE = 10;
+  public static final int YYINITIAL = 0;
+  public static final int CATEGORY_STATE = 2;
+  public static final int INTERNAL_LINK_STATE = 4;
   public static final int EXTERNAL_LINK_STATE = 6;
+  public static final int TWO_SINGLE_QUOTES_STATE = 8;
+  public static final int THREE_SINGLE_QUOTES_STATE = 10;
+  public static final int FIVE_SINGLE_QUOTES_STATE = 12;
   public static final int DOUBLE_EQUALS_STATE = 14;
-  public static final int INTERNAL_LINK_STATE = 4;
   public static final int DOUBLE_BRACE_STATE = 16;
-  public static final int CATEGORY_STATE = 2;
-  public static final int YYINITIAL = 0;
   public static final int STRING = 18;
-  public static final int FIVE_SINGLE_QUOTES_STATE = 12;
-  public static final int TWO_SINGLE_QUOTES_STATE = 8;
 
   /**
    * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
@@ -589,7 +589,7 @@ final void reset() {
       }     
     }
 
-	// numRead < 0
+    // numRead < 0
     return true;
   }
 
@@ -810,188 +810,188 @@ final void reset() {
       zzMarkedPos = zzMarkedPosL;
 
       switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 44: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;
+        case 1: 
+          { numWikiTokensSeen = 0;  positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
           }
         case 47: break;
-        case 37: 
-          { currentTokType = BOLD_ITALICS;  yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
+        case 2: 
+          { positionInc = 1; return ALPHANUM;
           }
         case 48: break;
-        case 16: 
-          { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
+        case 3: 
+          { positionInc = 1; return CJ;
           }
         case 49: break;
-        case 20: 
-          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+        case 4: 
+          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
           }
         case 50: break;
-        case 40: 
-          { positionInc = 1; return ACRONYM;
-          }
-        case 51: break;
         case 5: 
           { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
           }
+        case 51: break;
+        case 6: 
+          { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
+          }
         case 52: break;
-        case 36: 
-          { positionInc = 1; return COMPANY;
+        case 7: 
+          { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
           }
         case 53: break;
-        case 10: 
-          { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+        case 8: 
+          { /* Break so we don't hit fall-through warning: */ break;/* ignore */
           }
         case 54: break;
-        case 15: 
-          { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break;
+        case 9: 
+          { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
           }
         case 55: break;
-        case 22: 
-          { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break;
+        case 10: 
+          { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
           }
         case 56: break;
-        case 35: 
-          { positionInc = 1; return NUM;
+        case 11: 
+          { currentTokType = BOLD;  yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
           }
         case 57: break;
-        case 33: 
-          { positionInc = 1; return APOSTROPHE;
+        case 12: 
+          { currentTokType = ITALICS; numWikiTokensSeen++;  yybegin(STRING); return currentTokType;/*italics*/
           }
         case 58: break;
-        case 21: 
-          { yybegin(STRING); return currentTokType;/*pipe*/
+        case 13: 
+          { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
           }
         case 59: break;
-        case 18: 
-          { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */
+        case 14: 
+          { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
           }
         case 60: break;
-        case 2: 
-          { positionInc = 1; return ALPHANUM;
+        case 15: 
+          { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break;
           }
         case 61: break;
-        case 1: 
-          { numWikiTokensSeen = 0;  positionInc = 1; /* Break so we don't hit fall-through warning: */ break;
+        case 16: 
+          { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
           }
         case 62: break;
         case 17: 
           { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
           }
         case 63: break;
-        case 39: 
-          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/
+        case 18: 
+          { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */
           }
         case 64: break;
-        case 29: 
-          { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0;  yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+        case 19: 
+          { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
           }
         case 65: break;
-        case 46: 
-          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
+        case 20: 
+          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
           }
         case 66: break;
-        case 27: 
-          { numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+        case 21: 
+          { yybegin(STRING); return currentTokType;/*pipe*/
           }
         case 67: break;
-        case 4: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
+        case 22: 
+          { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break;
           }
         case 68: break;
-        case 38: 
-          { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/
+        case 23: 
+          { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;
           }
         case 69: break;
-        case 13: 
-          { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+        case 24: 
+          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
           }
         case 70: break;
-        case 3: 
-          { positionInc = 1; return CJ;
+        case 25: 
+          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;
           }
         case 71: break;
-        case 45: 
-          { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
+        case 26: 
+          { yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;
           }
         case 72: break;
-        case 6: 
-          { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
+        case 27: 
+          { numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
           }
         case 73: break;
-        case 11: 
-          { currentTokType = BOLD;  yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
+        case 28: 
+          { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
           }
         case 74: break;
-        case 25: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;
+        case 29: 
+          { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0;  yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
           }
         case 75: break;
-        case 8: 
-          { /* Break so we don't hit fall-through warning: */ break;/* ignore */
+        case 30: 
+          { yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
           }
         case 76: break;
-        case 19: 
-          { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
+        case 31: 
+          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/
           }
         case 77: break;
-        case 43: 
-          { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
+        case 32: 
+          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
           }
         case 78: break;
-        case 42: 
-          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/
+        case 33: 
+          { positionInc = 1; return APOSTROPHE;
           }
         case 79: break;
-        case 30: 
-          { yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;
+        case 34: 
+          { positionInc = 1; return HOST;
           }
         case 80: break;
-        case 14: 
-          { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
+        case 35: 
+          { positionInc = 1; return NUM;
           }
         case 81: break;
-        case 9: 
-          { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
+        case 36: 
+          { positionInc = 1; return COMPANY;
           }
         case 82: break;
-        case 7: 
-          { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
+        case 37: 
+          { currentTokType = BOLD_ITALICS;  yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;
           }
         case 83: break;
-        case 41: 
-          { positionInc = 1; return EMAIL;
+        case 38: 
+          { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/
           }
         case 84: break;
-        case 28: 
-          { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+        case 39: 
+          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/
           }
         case 85: break;
-        case 23: 
-          { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;
+        case 40: 
+          { positionInc = 1; return ACRONYM;
           }
         case 86: break;
-        case 34: 
-          { positionInc = 1; return HOST;
+        case 41: 
+          { positionInc = 1; return EMAIL;
           }
         case 87: break;
-        case 32: 
-          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;
+        case 42: 
+          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/
           }
         case 88: break;
-        case 12: 
-          { currentTokType = ITALICS; numWikiTokensSeen++;  yybegin(STRING); return currentTokType;/*italics*/
+        case 43: 
+          { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
           }
         case 89: break;
-        case 24: 
-          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;
+        case 44: 
+          { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;
           }
         case 90: break;
-        case 31: 
-          { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/
+        case 45: 
+          { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
           }
         case 91: break;
-        case 26: 
-          { yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;
+        case 46: 
+          { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;
           }
         case 92: break;
         default: 

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex Fri Sep 21 17:21:34 2012
@@ -136,7 +136,7 @@ NUM        = ({ALPHANUM} {P} {HAS_DIGIT}
 TAGS = "<"\/?{ALPHANUM}({WHITESPACE}*{ALPHANUM}=\"{ALPHANUM}\")*">"
 
 // punctuation
-P	         = ("_"|"-"|"/"|"."|",")
+P           = ("_"|"-"|"/"|"."|",")
 
 // at least one digit
 HAS_DIGIT  =

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/collation/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/collation/package.html?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/collation/package.html (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/collation/package.html Fri Sep 21 17:21:34 2012
@@ -53,17 +53,17 @@
 <pre class="prettyprint">
   // "fa" Locale is not supported by Sun JDK 1.4 or 1.5
   Collator collator = Collator.getInstance(new Locale("ar"));
-  CollationKeyAnalyzer analyzer = new CollationKeyAnalyzer(Version.LUCENE_40, collator);
+  CollationKeyAnalyzer analyzer = new CollationKeyAnalyzer(version, collator);
   RAMDirectory ramDir = new RAMDirectory();
-  IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
+  IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(version, analyzer));
   Document doc = new Document();
-  doc.add(new Field("content", "\u0633\u0627\u0628", 
-                    Field.Store.YES, Field.Index.ANALYZED));
+  doc.add(new TextField("content", "\u0633\u0627\u0628", Field.Store.YES));
   writer.addDocument(doc);
   writer.close();
-  IndexSearcher is = new IndexSearcher(ramDir, true);
+  IndexReader ir = DirectoryReader.open(ramDir);
+  IndexSearcher is = new IndexSearcher(ir);
 
-  QueryParser aqp = new QueryParser(Version.LUCENE_40, "content", analyzer);
+  QueryParser aqp = new QueryParser(version, "content", analyzer);
   aqp.setAnalyzeRangeTerms(true);
     
   // Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
@@ -79,20 +79,21 @@
 <h3>Danish Sorting</h3>
 <pre class="prettyprint">
   Analyzer analyzer 
-    = new CollationKeyAnalyzer(Version.LUCENE_40, Collator.getInstance(new Locale("da", "dk")));
+    = new CollationKeyAnalyzer(version, Collator.getInstance(new Locale("da", "dk")));
   RAMDirectory indexStore = new RAMDirectory();
-  IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(Version.LUCENE_40, analyzer));
+  IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(version, analyzer));
   String[] tracer = new String[] { "A", "B", "C", "D", "E" };
   String[] data = new String[] { "HAT", "HUT", "H\u00C5T", "H\u00D8T", "HOT" };
   String[] sortedTracerOrder = new String[] { "A", "E", "B", "D", "C" };
   for (int i = 0 ; i < data.length ; ++i) {
     Document doc = new Document();
-    doc.add(new Field("tracer", tracer[i], Field.Store.YES, Field.Index.NO));
-    doc.add(new Field("contents", data[i], Field.Store.NO, Field.Index.ANALYZED));
+    doc.add(new StoredField("tracer", tracer[i]));
+    doc.add(new TextField("contents", data[i], Field.Store.NO));
     writer.addDocument(doc);
   }
   writer.close();
-  IndexSearcher searcher = new IndexSearcher(indexStore, true);
+  IndexReader ir = DirectoryReader.open(indexStore);
+  IndexSearcher searcher = new IndexSearcher(ir);
   Sort sort = new Sort();
   sort.setSort(new SortField("contents", SortField.STRING));
   Query query = new MatchAllDocsQuery();
@@ -107,15 +108,16 @@
 <pre class="prettyprint">
   Collator collator = Collator.getInstance(new Locale("tr", "TR"));
   collator.setStrength(Collator.PRIMARY);
-  Analyzer analyzer = new CollationKeyAnalyzer(Version.LUCENE_40, collator);
+  Analyzer analyzer = new CollationKeyAnalyzer(version, collator);
   RAMDirectory ramDir = new RAMDirectory();
-  IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(Version.LUCENE_40, analyzer));
+  IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig(version, analyzer));
   Document doc = new Document();
-  doc.add(new Field("contents", "DIGY", Field.Store.NO, Field.Index.ANALYZED));
+  doc.add(new TextField("contents", "DIGY", Field.Store.NO));
   writer.addDocument(doc);
   writer.close();
-  IndexSearcher is = new IndexSearcher(ramDir, true);
-  QueryParser parser = new QueryParser(Version.LUCENE_40, "contents", analyzer);
+  IndexReader ir = DirectoryReader.open(ramDir);
+  IndexSearcher is = new IndexSearcher(ir);
+  QueryParser parser = new QueryParser(version, "contents", analyzer);
   Query query = parser.parse("d\u0131gy");   // U+0131: dotless i
   ScoreDoc[] result = is.search(query, null, 1000).scoreDocs;
   assertEquals("The index Term should be included.", 1, result.length);

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/Among.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/Among.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/Among.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/Among.java Fri Sep 21 17:21:34 2012
@@ -43,25 +43,26 @@ import java.lang.reflect.Method;
  * reflection calls (Lovins, etc) use EMPTY_ARGS/EMPTY_PARAMS
  */
 public class Among {
-    private static final Class<?>[] EMPTY_PARAMS = new Class[0];
-    public Among (String s, int substring_i, int result,
-		  String methodname, SnowballProgram methodobject) {
-        this.s_size = s.length();
-        this.s = s.toCharArray();
-        this.substring_i = substring_i;
-	this.result = result;
-	this.methodobject = methodobject;
-	if (methodname.length() == 0) {
-	    this.method = null;
-	} else {
-	    try {
-		this.method = methodobject.getClass().
-		getDeclaredMethod(methodname, EMPTY_PARAMS);
-	    } catch (NoSuchMethodException e) {
-		throw new RuntimeException(e);
-	    }
-	}
+  private static final Class<?>[] EMPTY_PARAMS = new Class[0];
+
+  public Among(String s, int substring_i, int result,
+               String methodname, SnowballProgram methodobject) {
+    this.s_size = s.length();
+    this.s = s.toCharArray();
+    this.substring_i = substring_i;
+    this.result = result;
+    this.methodobject = methodobject;
+    if (methodname.length() == 0) {
+      this.method = null;
+    } else {
+      try {
+        this.method = methodobject.getClass().
+            getDeclaredMethod(methodname, EMPTY_PARAMS);
+      } catch (NoSuchMethodException e) {
+        throw new RuntimeException(e);
+      }
     }
+  }
 
     public final int s_size; /* search string */
     public final char[] s; /* search string */

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/SnowballProgram.java Fri Sep 21 17:21:34 2012
@@ -51,8 +51,8 @@ public abstract class SnowballProgram {
 
     protected SnowballProgram()
     {
-	current = new char[8];
-	setCurrent("");
+      current = new char[8];
+      setCurrent("");
     }
 
     public abstract boolean stem();
@@ -62,12 +62,12 @@ public abstract class SnowballProgram {
      */
     public void setCurrent(String value)
     {
-	current = value.toCharArray();
-	cursor = 0;
-	limit = value.length();
-	limit_backward = 0;
-	bra = cursor;
-	ket = limit;
+      current = value.toCharArray();
+      cursor = 0;
+      limit = value.length();
+      limit_backward = 0;
+      bra = cursor;
+      ket = limit;
     }
 
     /**
@@ -130,354 +130,350 @@ public abstract class SnowballProgram {
 
     protected void copy_from(SnowballProgram other)
     {
-	current          = other.current;
-	cursor           = other.cursor;
-	limit            = other.limit;
-	limit_backward   = other.limit_backward;
-	bra              = other.bra;
-	ket              = other.ket;
+      current          = other.current;
+      cursor           = other.cursor;
+      limit            = other.limit;
+      limit_backward   = other.limit_backward;
+      bra              = other.bra;
+      ket              = other.ket;
     }
 
     protected boolean in_grouping(char [] s, int min, int max)
     {
-	if (cursor >= limit) return false;
-	char ch = current[cursor];
-	if (ch > max || ch < min) return false;
-	ch -= min;
-	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
-	cursor++;
-	return true;
+      if (cursor >= limit) return false;
+      char ch = current[cursor];
+      if (ch > max || ch < min) return false;
+      ch -= min;
+      if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
+      cursor++;
+      return true;
     }
 
     protected boolean in_grouping_b(char [] s, int min, int max)
     {
-	if (cursor <= limit_backward) return false;
-	char ch = current[cursor - 1];
-	if (ch > max || ch < min) return false;
-	ch -= min;
-	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
-	cursor--;
-	return true;
+      if (cursor <= limit_backward) return false;
+      char ch = current[cursor - 1];
+      if (ch > max || ch < min) return false;
+      ch -= min;
+      if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
+      cursor--;
+      return true;
     }
 
     protected boolean out_grouping(char [] s, int min, int max)
     {
-	if (cursor >= limit) return false;
-	char ch = current[cursor];
-	if (ch > max || ch < min) {
-	    cursor++;
-	    return true;
-	}
-	ch -= min;
-	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
-	    cursor ++;
-	    return true;
-	}
-	return false;
+      if (cursor >= limit) return false;
+      char ch = current[cursor];
+      if (ch > max || ch < min) {
+          cursor++;
+          return true;
+      }
+      ch -= min;
+      if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
+          cursor ++;
+          return true;
+      }
+      return false;
     }
 
     protected boolean out_grouping_b(char [] s, int min, int max)
     {
-	if (cursor <= limit_backward) return false;
-	char ch = current[cursor - 1];
-	if (ch > max || ch < min) {
-	    cursor--;
-	    return true;
-	}
-	ch -= min;
-	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
-	    cursor--;
-	    return true;
-	}
-	return false;
+      if (cursor <= limit_backward) return false;
+      char ch = current[cursor - 1];
+      if (ch > max || ch < min) {
+          cursor--;
+          return true;
+      }
+      ch -= min;
+      if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
+          cursor--;
+          return true;
+      }
+      return false;
     }
 
     protected boolean in_range(int min, int max)
     {
-	if (cursor >= limit) return false;
-	char ch = current[cursor];
-	if (ch > max || ch < min) return false;
-	cursor++;
-	return true;
+      if (cursor >= limit) return false;
+      char ch = current[cursor];
+      if (ch > max || ch < min) return false;
+      cursor++;
+      return true;
     }
 
     protected boolean in_range_b(int min, int max)
     {
-	if (cursor <= limit_backward) return false;
-	char ch = current[cursor - 1];
-	if (ch > max || ch < min) return false;
-	cursor--;
-	return true;
+      if (cursor <= limit_backward) return false;
+      char ch = current[cursor - 1];
+      if (ch > max || ch < min) return false;
+      cursor--;
+      return true;
     }
 
     protected boolean out_range(int min, int max)
     {
-	if (cursor >= limit) return false;
-	char ch = current[cursor];
-	if (!(ch > max || ch < min)) return false;
-	cursor++;
-	return true;
+      if (cursor >= limit) return false;
+      char ch = current[cursor];
+      if (!(ch > max || ch < min)) return false;
+      cursor++;
+      return true;
     }
 
     protected boolean out_range_b(int min, int max)
     {
-	if (cursor <= limit_backward) return false;
-	char ch = current[cursor - 1];
-	if(!(ch > max || ch < min)) return false;
-	cursor--;
-	return true;
+      if (cursor <= limit_backward) return false;
+      char ch = current[cursor - 1];
+      if(!(ch > max || ch < min)) return false;
+      cursor--;
+      return true;
     }
 
     protected boolean eq_s(int s_size, CharSequence s)
     {
-	if (limit - cursor < s_size) return false;
-	int i;
-	for (i = 0; i != s_size; i++) {
-	    if (current[cursor + i] != s.charAt(i)) return false;
-	}
-	cursor += s_size;
-	return true;
+      if (limit - cursor < s_size) return false;
+      int i;
+      for (i = 0; i != s_size; i++) {
+          if (current[cursor + i] != s.charAt(i)) return false;
+      }
+      cursor += s_size;
+      return true;
     }
 
     protected boolean eq_s_b(int s_size, CharSequence s)
     {
-	if (cursor - limit_backward < s_size) return false;
-	int i;
-	for (i = 0; i != s_size; i++) {
-	    if (current[cursor - s_size + i] != s.charAt(i)) return false;
-	}
-	cursor -= s_size;
-	return true;
+      if (cursor - limit_backward < s_size) return false;
+      int i;
+      for (i = 0; i != s_size; i++) {
+          if (current[cursor - s_size + i] != s.charAt(i)) return false;
+      }
+      cursor -= s_size;
+      return true;
     }
 
     protected boolean eq_v(CharSequence s)
     {
-	return eq_s(s.length(), s);
+      return eq_s(s.length(), s);
     }
 
     protected boolean eq_v_b(CharSequence s)
-    {   return eq_s_b(s.length(), s);
+    {
+      return eq_s_b(s.length(), s);
     }
 
     protected int find_among(Among v[], int v_size)
     {
-	int i = 0;
-	int j = v_size;
+      int i = 0;
+      int j = v_size;
+
+      int c = cursor;
+      int l = limit;
 
-	int c = cursor;
-	int l = limit;
+      int common_i = 0;
+      int common_j = 0;
 
-	int common_i = 0;
-	int common_j = 0;
+      boolean first_key_inspected = false;
+
+      while (true) {
+        int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j; // smaller
+        Among w = v[k];
+        int i2;
+        for (i2 = common; i2 < w.s_size; i2++) {
+          if (c + common == l) {
+            diff = -1;
+            break;
+          }
+          diff = current[c + common] - w.s[i2];
+          if (diff != 0) break;
+          common++;
+        }
+        if (diff < 0) {
+          j = k;
+          common_j = common;
+        } else {
+          i = k;
+          common_i = common;
+        }
+        if (j - i <= 1) {
+          if (i > 0) break; // v->s has been inspected
+          if (j == i) break; // only one item in v
+
+          // - but now we need to go round once more to get
+          // v->s inspected. This looks messy, but is actually
+          // the optimal approach.
 
-	boolean first_key_inspected = false;
-
-	while(true) {
-	    int k = i + ((j - i) >> 1);
-	    int diff = 0;
-	    int common = common_i < common_j ? common_i : common_j; // smaller
-	    Among w = v[k];
-	    int i2;
-	    for (i2 = common; i2 < w.s_size; i2++) {
-		if (c + common == l) {
-		    diff = -1;
-		    break;
-		}
-		diff = current[c + common] - w.s[i2];
-		if (diff != 0) break;
-		common++;
-	    }
-	    if (diff < 0) {
-		j = k;
-		common_j = common;
-	    } else {
-		i = k;
-		common_i = common;
-	    }
-	    if (j - i <= 1) {
-		if (i > 0) break; // v->s has been inspected
-		if (j == i) break; // only one item in v
-
-		// - but now we need to go round once more to get
-		// v->s inspected. This looks messy, but is actually
-		// the optimal approach.
-
-		if (first_key_inspected) break;
-		first_key_inspected = true;
-	    }
-	}
-	while(true) {
-	    Among w = v[i];
-	    if (common_i >= w.s_size) {
-		cursor = c + w.s_size;
-		if (w.method == null) return w.result;
-		boolean res;
-		try {
-		    Object resobj = w.method.invoke(w.methodobject, EMPTY_ARGS);
-		    res = resobj.toString().equals("true");
-		} catch (InvocationTargetException e) {
-		    res = false;
-		    // FIXME - debug message
-		} catch (IllegalAccessException e) {
-		    res = false;
-		    // FIXME - debug message
-		}
-		cursor = c + w.s_size;
-		if (res) return w.result;
-	    }
-	    i = w.substring_i;
-	    if (i < 0) return 0;
-	}
+          if (first_key_inspected) break;
+          first_key_inspected = true;
+        }
+      }
+      while (true) {
+        Among w = v[i];
+        if (common_i >= w.s_size) {
+          cursor = c + w.s_size;
+          if (w.method == null) return w.result;
+          boolean res;
+          try {
+            Object resobj = w.method.invoke(w.methodobject, EMPTY_ARGS);
+            res = resobj.toString().equals("true");
+          } catch (InvocationTargetException e) {
+            res = false;
+            // FIXME - debug message
+          } catch (IllegalAccessException e) {
+            res = false;
+            // FIXME - debug message
+          }
+          cursor = c + w.s_size;
+          if (res) return w.result;
+        }
+        i = w.substring_i;
+        if (i < 0) return 0;
+      }
     }
 
-    // find_among_b is for backwards processing. Same comments apply
+  // find_among_b is for backwards processing. Same comments apply
     protected int find_among_b(Among v[], int v_size)
     {
-	int i = 0;
-	int j = v_size;
+  int i = 0;
+  int j = v_size;
 
-	int c = cursor;
-	int lb = limit_backward;
+  int c = cursor;
+  int lb = limit_backward;
 
-	int common_i = 0;
-	int common_j = 0;
+  int common_i = 0;
+  int common_j = 0;
 
-	boolean first_key_inspected = false;
-
-	while(true) {
-	    int k = i + ((j - i) >> 1);
-	    int diff = 0;
-	    int common = common_i < common_j ? common_i : common_j;
-	    Among w = v[k];
-	    int i2;
-	    for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
-		if (c - common == lb) {
-		    diff = -1;
-		    break;
-		}
-		diff = current[c - 1 - common] - w.s[i2];
-		if (diff != 0) break;
-		common++;
-	    }
-	    if (diff < 0) {
-		j = k;
-		common_j = common;
-	    } else {
-		i = k;
-		common_i = common;
-	    }
-	    if (j - i <= 1) {
-		if (i > 0) break;
-		if (j == i) break;
-		if (first_key_inspected) break;
-		first_key_inspected = true;
-	    }
-	}
-	while(true) {
-	    Among w = v[i];
-	    if (common_i >= w.s_size) {
-		cursor = c - w.s_size;
-		if (w.method == null) return w.result;
-
-		boolean res;
-		try {
-		    Object resobj = w.method.invoke(w.methodobject, EMPTY_ARGS);
-		    res = resobj.toString().equals("true");
-		} catch (InvocationTargetException e) {
-		    res = false;
-		    // FIXME - debug message
-		} catch (IllegalAccessException e) {
-		    res = false;
-		    // FIXME - debug message
-		}
-		cursor = c - w.s_size;
-		if (res) return w.result;
-	    }
-	    i = w.substring_i;
-	    if (i < 0) return 0;
-	}
+  boolean first_key_inspected = false;
+
+      while (true) {
+        int k = i + ((j - i) >> 1);
+        int diff = 0;
+        int common = common_i < common_j ? common_i : common_j;
+        Among w = v[k];
+        int i2;
+        for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
+          if (c - common == lb) {
+            diff = -1;
+            break;
+          }
+          diff = current[c - 1 - common] - w.s[i2];
+          if (diff != 0) break;
+          common++;
+        }
+        if (diff < 0) {
+          j = k;
+          common_j = common;
+        } else {
+          i = k;
+          common_i = common;
+        }
+        if (j - i <= 1) {
+          if (i > 0) break;
+          if (j == i) break;
+          if (first_key_inspected) break;
+          first_key_inspected = true;
+        }
+      }
+      while (true) {
+        Among w = v[i];
+        if (common_i >= w.s_size) {
+          cursor = c - w.s_size;
+          if (w.method == null) return w.result;
+
+          boolean res;
+          try {
+            Object resobj = w.method.invoke(w.methodobject, EMPTY_ARGS);
+            res = resobj.toString().equals("true");
+          } catch (InvocationTargetException e) {
+            res = false;
+            // FIXME - debug message
+          } catch (IllegalAccessException e) {
+            res = false;
+            // FIXME - debug message
+          }
+          cursor = c - w.s_size;
+          if (res) return w.result;
+        }
+        i = w.substring_i;
+        if (i < 0) return 0;
+      }
     }
 
-    /* to replace chars between c_bra and c_ket in current by the
+  /* to replace chars between c_bra and c_ket in current by the
      * chars in s.
      */
-    protected int replace_s(int c_bra, int c_ket, CharSequence s)
-    {
-	final int adjustment = s.length() - (c_ket - c_bra);
-	final int newLength = limit + adjustment;
-	//resize if necessary
-	if (newLength > current.length) {
-	  char newBuffer[] = new char[ArrayUtil.oversize(newLength, RamUsageEstimator.NUM_BYTES_CHAR)];
-	  System.arraycopy(current, 0, newBuffer, 0, limit);
-	  current = newBuffer;
-	}
-	// if the substring being replaced is longer or shorter than the
-	// replacement, need to shift things around
-	if (adjustment != 0 && c_ket < limit) {
-	  System.arraycopy(current, c_ket, current, c_bra + s.length(), 
-	      limit - c_ket);
-	}
-	// insert the replacement text
-	// Note, faster is s.getChars(0, s.length(), current, c_bra);
-	// but would have to duplicate this method for both String and StringBuilder
-	for (int i = 0; i < s.length(); i++)
-	  current[c_bra + i] = s.charAt(i);
-	
-	limit += adjustment;
-	if (cursor >= c_ket) cursor += adjustment;
-	else if (cursor > c_bra) cursor = c_bra;
-	return adjustment;
-    }
-
-    protected void slice_check()
-    {
-	if (bra < 0 ||
-	    bra > ket ||
-	    ket > limit)
-	{
-	    throw new IllegalArgumentException("faulty slice operation: bra=" + bra + ",ket=" + ket + ",limit=" + limit);
-	// FIXME: report error somehow.
-	/*
-	    fprintf(stderr, "faulty slice operation:\n");
-	    debug(z, -1, 0);
-	    exit(1);
-	    */
-	}
-    }
-
-    protected void slice_from(CharSequence s)
-    {
-	slice_check();
-	replace_s(bra, ket, s);
-    }
- 
-    protected void slice_del()
-    {
-	slice_from((CharSequence)"");
-    }
-
-    protected void insert(int c_bra, int c_ket, CharSequence s)
-    {
-	int adjustment = replace_s(c_bra, c_ket, s);
-	if (c_bra <= bra) bra += adjustment;
-	if (c_bra <= ket) ket += adjustment;
+  protected int replace_s(int c_bra, int c_ket, CharSequence s) {
+    final int adjustment = s.length() - (c_ket - c_bra);
+    final int newLength = limit + adjustment;
+    //resize if necessary
+    if (newLength > current.length) {
+      char newBuffer[] = new char[ArrayUtil.oversize(newLength, RamUsageEstimator.NUM_BYTES_CHAR)];
+      System.arraycopy(current, 0, newBuffer, 0, limit);
+      current = newBuffer;
+    }
+    // if the substring being replaced is longer or shorter than the
+    // replacement, need to shift things around
+    if (adjustment != 0 && c_ket < limit) {
+      System.arraycopy(current, c_ket, current, c_bra + s.length(),
+          limit - c_ket);
+    }
+    // insert the replacement text
+    // Note, faster is s.getChars(0, s.length(), current, c_bra);
+    // but would have to duplicate this method for both String and StringBuilder
+    for (int i = 0; i < s.length(); i++)
+      current[c_bra + i] = s.charAt(i);
+
+    limit += adjustment;
+    if (cursor >= c_ket) cursor += adjustment;
+    else if (cursor > c_bra) cursor = c_bra;
+    return adjustment;
+  }
+
+  protected void slice_check() {
+    if (bra < 0 ||
+        bra > ket ||
+        ket > limit) {
+      throw new IllegalArgumentException("faulty slice operation: bra=" + bra + ",ket=" + ket + ",limit=" + limit);
+      // FIXME: report error somehow.
+      /*
+      fprintf(stderr, "faulty slice operation:\n");
+      debug(z, -1, 0);
+      exit(1);
+      */
+    }
+  }
+
+  protected void slice_from(CharSequence s) {
+    slice_check();
+    replace_s(bra, ket, s);
+  }
+
+  protected void slice_del() {
+    slice_from((CharSequence) "");
+  }
+
+  protected void insert(int c_bra, int c_ket, CharSequence s)
+    {
+      int adjustment = replace_s(c_bra, c_ket, s);
+      if (c_bra <= bra) bra += adjustment;
+      if (c_bra <= ket) ket += adjustment;
     }
 
     /* Copy the slice into the supplied StringBuffer */
     protected StringBuilder slice_to(StringBuilder s)
     {
-	slice_check();
-	int len = ket - bra;
-	s.setLength(0);
-	s.append(current, bra, len);
-	return s;
+      slice_check();
+      int len = ket - bra;
+      s.setLength(0);
+      s.append(current, bra, len);
+      return s;
     }
 
     protected StringBuilder assign_to(StringBuilder s)
     {
-	s.setLength(0);
-	s.append(current, 0, limit);
-	return s;
+      s.setLength(0);
+      s.append(current, 0, limit);
+      return s;
     }
 
 /*

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianStemmer.java Fri Sep 21 17:21:34 2012
@@ -38,87 +38,87 @@ import org.apache.lucene.analysis.util.C
 public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
   
   public void testWithSnowballExamples() throws Exception {
-	 check("boa", "boa");
-	 check("boainain", "boainain");
-	 check("boas", "boas");
-	 check("bôas", "boas"); // removes diacritic: different from snowball portugese
-	 check("boassu", "boassu");
-	 check("boataria", "boat");
-	 check("boate", "boat");
-	 check("boates", "boat");
-	 check("boatos", "boat");
-	 check("bob", "bob");
-	 check("boba", "bob");
-	 check("bobagem", "bobag");
-	 check("bobagens", "bobagens");
-	 check("bobalhões", "bobalho"); // removes diacritic: different from snowball portugese
-	 check("bobear", "bob");
-	 check("bobeira", "bobeir");
-	 check("bobinho", "bobinh");
-	 check("bobinhos", "bobinh");
-	 check("bobo", "bob");
-	 check("bobs", "bobs");
-	 check("boca", "boc");
-	 check("bocadas", "boc");
-	 check("bocadinho", "bocadinh");
-	 check("bocado", "boc");
-	 check("bocaiúva", "bocaiuv"); // removes diacritic: different from snowball portuguese
-	 check("boçal", "bocal"); // removes diacritic: different from snowball portuguese
-	 check("bocarra", "bocarr");
-	 check("bocas", "boc");
-	 check("bode", "bod");
-	 check("bodoque", "bodoqu");
-	 check("body", "body");
-	 check("boeing", "boeing");
-	 check("boem", "boem");
-	 check("boemia", "boem");
-	 check("boêmio", "boemi"); // removes diacritic: different from snowball portuguese
-	 check("bogotá", "bogot");
-	 check("boi", "boi");
-	 check("bóia", "boi"); // removes diacritic: different from snowball portuguese
-	 check("boiando", "boi");
-	 check("quiabo", "quiab");
-	 check("quicaram", "quic");
-	 check("quickly", "quickly");
-	 check("quieto", "quiet");
-	 check("quietos", "quiet");
-	 check("quilate", "quilat");
-	 check("quilates", "quilat");
-	 check("quilinhos", "quilinh");
-	 check("quilo", "quil");
-	 check("quilombo", "quilomb");
-	 check("quilométricas", "quilometr"); // removes diacritic: different from snowball portuguese
-	 check("quilométricos", "quilometr"); // removes diacritic: different from snowball portuguese
-	 check("quilômetro", "quilometr"); // removes diacritic: different from snowball portoguese
-	 check("quilômetros", "quilometr"); // removes diacritic: different from snowball portoguese
-	 check("quilos", "quil");
-	 check("quimica", "quimic");
-	 check("quilos", "quil");
-	 check("quimica", "quimic");
-	 check("quimicas", "quimic");
-	 check("quimico", "quimic");
-	 check("quimicos", "quimic");
-	 check("quimioterapia", "quimioterap");
-	 check("quimioterápicos", "quimioterap"); // removes diacritic: different from snowball portoguese
-	 check("quimono", "quimon");
-	 check("quincas", "quinc");
-	 check("quinhão", "quinha"); // removes diacritic: different from snowball portoguese
-	 check("quinhentos", "quinhent");
-	 check("quinn", "quinn");
-	 check("quino", "quin");
-	 check("quinta", "quint");
-	 check("quintal", "quintal");
-	 check("quintana", "quintan");
-	 check("quintanilha", "quintanilh");
-	 check("quintão", "quinta"); // removes diacritic: different from snowball portoguese
-	 check("quintessência", "quintessente"); // versus snowball portuguese 'quintessent'
-	 check("quintino", "quintin");
-	 check("quinto", "quint");
-	 check("quintos", "quint");
-	 check("quintuplicou", "quintuplic");
-	 check("quinze", "quinz");
-	 check("quinzena", "quinzen");
-	 check("quiosque", "quiosqu");
+   check("boa", "boa");
+   check("boainain", "boainain");
+   check("boas", "boas");
+   check("bôas", "boas"); // removes diacritic: different from snowball portugese
+   check("boassu", "boassu");
+   check("boataria", "boat");
+   check("boate", "boat");
+   check("boates", "boat");
+   check("boatos", "boat");
+   check("bob", "bob");
+   check("boba", "bob");
+   check("bobagem", "bobag");
+   check("bobagens", "bobagens");
+   check("bobalhões", "bobalho"); // removes diacritic: different from snowball portugese
+   check("bobear", "bob");
+   check("bobeira", "bobeir");
+   check("bobinho", "bobinh");
+   check("bobinhos", "bobinh");
+   check("bobo", "bob");
+   check("bobs", "bobs");
+   check("boca", "boc");
+   check("bocadas", "boc");
+   check("bocadinho", "bocadinh");
+   check("bocado", "boc");
+   check("bocaiúva", "bocaiuv"); // removes diacritic: different from snowball portuguese
+   check("boçal", "bocal"); // removes diacritic: different from snowball portuguese
+   check("bocarra", "bocarr");
+   check("bocas", "boc");
+   check("bode", "bod");
+   check("bodoque", "bodoqu");
+   check("body", "body");
+   check("boeing", "boeing");
+   check("boem", "boem");
+   check("boemia", "boem");
+   check("boêmio", "boemi"); // removes diacritic: different from snowball portuguese
+   check("bogotá", "bogot");
+   check("boi", "boi");
+   check("bóia", "boi"); // removes diacritic: different from snowball portuguese
+   check("boiando", "boi");
+   check("quiabo", "quiab");
+   check("quicaram", "quic");
+   check("quickly", "quickly");
+   check("quieto", "quiet");
+   check("quietos", "quiet");
+   check("quilate", "quilat");
+   check("quilates", "quilat");
+   check("quilinhos", "quilinh");
+   check("quilo", "quil");
+   check("quilombo", "quilomb");
+   check("quilométricas", "quilometr"); // removes diacritic: different from snowball portuguese
+   check("quilométricos", "quilometr"); // removes diacritic: different from snowball portuguese
+   check("quilômetro", "quilometr"); // removes diacritic: different from snowball portoguese
+   check("quilômetros", "quilometr"); // removes diacritic: different from snowball portoguese
+   check("quilos", "quil");
+   check("quimica", "quimic");
+   check("quilos", "quil");
+   check("quimica", "quimic");
+   check("quimicas", "quimic");
+   check("quimico", "quimic");
+   check("quimicos", "quimic");
+   check("quimioterapia", "quimioterap");
+   check("quimioterápicos", "quimioterap"); // removes diacritic: different from snowball portoguese
+   check("quimono", "quimon");
+   check("quincas", "quinc");
+   check("quinhão", "quinha"); // removes diacritic: different from snowball portoguese
+   check("quinhentos", "quinhent");
+   check("quinn", "quinn");
+   check("quino", "quin");
+   check("quinta", "quint");
+   check("quintal", "quintal");
+   check("quintana", "quintan");
+   check("quintanilha", "quintanilh");
+   check("quintão", "quinta"); // removes diacritic: different from snowball portoguese
+   check("quintessência", "quintessente"); // versus snowball portuguese 'quintessent'
+   check("quintino", "quintin");
+   check("quinto", "quint");
+   check("quintos", "quint");
+   check("quintuplicou", "quintuplic");
+   check("quinze", "quinz");
+   check("quinzena", "quinzen");
+   check("quiosque", "quiosqu");
   }
   
   public void testNormalization() throws Exception {
@@ -175,4 +175,4 @@ public class TestBrazilianStemmer extend
     };
     checkOneTermReuse(a, "", "");
   }
-}
\ No newline at end of file
+}

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java Fri Sep 21 17:21:34 2012
@@ -33,6 +33,7 @@ import org.apache.lucene.analysis.CharFi
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util._TestUtil;
 
 public class TestMappingCharFilter extends BaseTokenStreamTestCase {
@@ -55,6 +56,11 @@ public class TestMappingCharFilter exten
 
     builder.add( "empty", "" );
 
+    // BMP (surrogate pair):
+    builder.add(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1), "fclef");
+
+    builder.add("\uff01", "full-width-exclamation");
+
     normMap = builder.build();
   }
 
@@ -128,6 +134,18 @@ public class TestMappingCharFilter exten
     assertTokenStreamContents(ts, new String[0], new int[]{}, new int[]{}, 5);
   }
 
+  public void testNonBMPChar() throws Exception {
+    CharFilter cs = new MappingCharFilter( normMap, new StringReader( UnicodeUtil.newString(new int[] {0x1D122}, 0, 1) ) );
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+    assertTokenStreamContents(ts, new String[]{"fclef"}, new int[]{0}, new int[]{2}, 2);
+  }
+
+  public void testFullWidthChar() throws Exception {
+    CharFilter cs = new MappingCharFilter( normMap, new StringReader( "\uff01") );
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+    assertTokenStreamContents(ts, new String[]{"full-width-exclamation"}, new int[]{0}, new int[]{1}, 1);
+  }
+
   //
   //                1111111111222
   //      01234567890123456789012

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java Fri Sep 21 17:21:34 2012
@@ -39,6 +39,7 @@ public class CommonGramsFilterTest exten
     CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     
     CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
+    cgf.reset();
     assertTrue(cgf.incrementToken());
     assertEquals("How", term.toString());
     assertTrue(cgf.incrementToken());
@@ -61,6 +62,7 @@ public class CommonGramsFilterTest exten
     CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
     
     CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
+    nsf.reset();
     assertTrue(nsf.incrementToken());
     assertEquals("How_the", term.toString());
     assertTrue(nsf.incrementToken());

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Fri Sep 21 17:21:34 2012
@@ -235,6 +235,7 @@ public class TestCompoundWordTokenFilter
         CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
     
     CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
+    tf.reset();
     assertTrue(tf.incrementToken());
     assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
     assertTrue(tf.incrementToken());
@@ -256,6 +257,7 @@ public class TestCompoundWordTokenFilter
         CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
         CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
     MockRetainAttribute retAtt = stream.addAttribute(MockRetainAttribute.class);
+    stream.reset();
     while (stream.incrementToken()) {
       assertTrue("Custom attribute value was lost", retAtt.getRetain());
     }

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java Fri Sep 21 17:21:34 2012
@@ -80,6 +80,7 @@ public class TestAnalyzers extends BaseT
 
   void verifyPayload(TokenStream ts) throws IOException {
     PayloadAttribute payloadAtt = ts.getAttribute(PayloadAttribute.class);
+    ts.reset();
     for(byte b=1;;b++) {
       boolean hasNext = ts.incrementToken();
       if (!hasNext) break;

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestFactories.java Fri Sep 21 17:21:34 2012
@@ -27,6 +27,7 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
 import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.MultiTermAwareComponent;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.StringMockResourceLoader;
@@ -114,11 +115,15 @@ public class TestFactories extends BaseT
   }
   
   /** tries to initialize a factory with no arguments */
-  private boolean initialize(AbstractAnalysisFactory factory) {
+  private boolean initialize(AbstractAnalysisFactory factory) throws IOException {
     boolean success = false;
     try {
       factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
       factory.init(Collections.<String,String>emptyMap());
+      if (factory instanceof ResourceLoaderAware) {
+        ResourceLoaderAware resourceLoaderAware = (ResourceLoaderAware) factory;
+          resourceLoaderAware.inform(new ClasspathResourceLoader(factory.getClass()));
+      }
       success = true;
     } catch (IllegalArgumentException ignored) {
       // its ok if we dont provide the right parameters to throw this

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Fri Sep 21 17:21:34 2012
@@ -782,31 +782,51 @@ public class TestRandomChains extends Ba
     @Override
     public int read(char[] cbuf, int off, int len) throws IOException {
       readSomething = true;
-      return in.read(cbuf, off, len);
+      return input.read(cbuf, off, len);
     }
 
     @Override
     public int read() throws IOException {
       readSomething = true;
-      return in.read();
+      return input.read();
     }
 
     @Override
     public int read(CharBuffer target) throws IOException {
       readSomething = true;
-      return in.read(target);
+      return input.read(target);
     }
 
     @Override
     public int read(char[] cbuf) throws IOException {
       readSomething = true;
-      return in.read(cbuf);
+      return input.read(cbuf);
     }
 
     @Override
     public long skip(long n) throws IOException {
       readSomething = true;
-      return in.skip(n);
+      return input.skip(n);
+    }
+
+    @Override
+    public void mark(int readAheadLimit) throws IOException {
+      input.mark(readAheadLimit);
+    }
+
+    @Override
+    public boolean markSupported() {
+      return input.markSupported();
+    }
+
+    @Override
+    public boolean ready() throws IOException {
+      return input.ready();
+    }
+
+    @Override
+    public void reset() throws IOException {
+      input.reset();
     }
   }
   

Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java?rev=1388574&r1=1388573&r2=1388574&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java Fri Sep 21 17:21:34 2012
@@ -66,6 +66,7 @@ public class TestStopAnalyzer extends Ba
     assertNotNull(stream);
     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
     
+    stream.reset();
     while (stream.incrementToken()) {
       String text = termAtt.toString();
       assertFalse(stopWordsSet.contains(text));
@@ -83,6 +84,7 @@ public class TestStopAnalyzer extends Ba
     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
     PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
 
+    stream.reset();
     while (stream.incrementToken()) {
       String text = termAtt.toString();
       assertFalse(stopWordsSet.contains(text));