You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/08/31 00:43:59 UTC
svn commit: r1379200 [2/11] - in /lucene/dev/branches/lucene3312: ./ dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/ dev-tools/maven/ dev-tools/maven/lucene/core/ dev-tools/maven/lucene/test-framework/ dev-tools/scripts/ lucene/ lucene/an...

Modified: lucene/dev/branches/lucene3312/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/MIGRATE.txt?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/MIGRATE.txt (original)
+++ lucene/dev/branches/lucene3312/lucene/MIGRATE.txt Thu Aug 30 22:43:41 2012
@@ -318,9 +318,9 @@ FieldCache, use them with care! 
 
 The method IndexReader#getSequentialSubReaders() was moved to CompositeReader
 (see LUCENE-2858, LUCENE-3733) and made protected. It is solely used by
-CompositeReader itsself to build its reader tree. To get all atomic leaves
+CompositeReader itself to build its reader tree. To get all atomic leaves
 of a reader, use IndexReader#leaves(), which also provides the doc base
-of each leave. Readers that are already atomic return itsself as leaf with
+of each leave. Readers that are already atomic return itself as leaf with
 doc base 0. To emulate Lucene 3.x getSequentialSubReaders(),
 use getContext().children().
 
@@ -626,3 +626,8 @@ you can now do this:
   method, StoredFieldVisitor has a needsField method: if that method
   returns true then the field will be loaded and the appropriate
   type-specific method will be invoked with that fields's value.
+
+* LUCENE-4122: Removed the Payload class and replaced with BytesRef.
+  PayloadAttribute's name is unchanged, it just uses the BytesRef
+  class to refer to the payload bytes/start offset/end offset 
+  (or null if there is no payload).

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java Thu Aug 30 22:43:41 2012
@@ -94,8 +94,7 @@ public final class KeywordTokenizer exte
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
+  public void reset() throws IOException {
     this.done = false;
   }
 }

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java Thu Aug 30 22:43:41 2012
@@ -78,9 +78,6 @@ public final class PatternTokenizer exte
     if (group >= 0 && group > matcher.groupCount()) {
       throw new IllegalArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups");
     }
-    fillBuffer(str, input);
-    matcher.reset(str);
-    index = 0;
   }
 
   @Override
@@ -136,8 +133,7 @@ public final class PatternTokenizer exte
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
+  public void reset() throws IOException {
     fillBuffer(str, input);
     matcher.reset(str);
     index = 0;

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Thu Aug 30 22:43:41 2012
@@ -175,8 +175,7 @@ public final class ClassicTokenizer exte
   }
 
   @Override
-  public void setReader(Reader reader) throws IOException {
-    super.setReader(reader);
-    scanner.yyreset(reader);
+  public void reset() throws IOException {
+    scanner.yyreset(input);
   }
 }

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Thu Aug 30 22:43:41 2012
@@ -183,8 +183,7 @@ public final class StandardTokenizer ext
   }
 
   @Override
-  public void setReader(Reader reader) throws IOException {
-    super.setReader(reader);
-    scanner.yyreset(reader);
+  public void reset() throws IOException {
+    scanner.yyreset(input);
   }
 }

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Thu Aug 30 22:43:41 2012
@@ -162,8 +162,7 @@ public final class UAX29URLEmailTokenize
   }
 
   @Override
-  public void setReader(Reader reader) throws IOException {
-    super.setReader(reader);
-    scanner.yyreset(reader);
+  public void reset() throws IOException {
+    scanner.yyreset(input);
   }
 }

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java Thu Aug 30 22:43:41 2012
@@ -37,6 +37,15 @@ import java.util.regex.PatternSyntaxExce
 /**
  * Abstract parent class for analysis factories {@link TokenizerFactory},
  * {@link TokenFilterFactory} and {@link CharFilterFactory}.
+ * <p>
+ * The typical lifecycle for a factory consumer is:
+ * <ol>
+ *   <li>Create factory via its a no-arg constructor
+ *   <li>Set version emulation by calling {@link #setLuceneMatchVersion(Version)}
+ *   <li>Calls {@link #init(Map)} passing arguments as key-value mappings.
+ *   <li>(Optional) If the factory uses resources such as files, {@link ResourceLoaderAware#inform(ResourceLoader)} is called to initialize those resources.
+ *   <li>Consumer calls create() to obtain instances.
+ * </ol>
  */
 public abstract class AbstractAnalysisFactory {
 
@@ -46,6 +55,9 @@ public abstract class AbstractAnalysisFa
   /** the luceneVersion arg */
   protected Version luceneMatchVersion = null;
 
+  /**
+   * Initialize this factory via a set of key-value pairs.
+   */
   public void init(Map<String,String> args) {
     this.args = args;
   }
@@ -104,6 +116,9 @@ public abstract class AbstractAnalysisFa
     return Boolean.parseBoolean(s);
   }
 
+  /**
+   * Compiles a pattern for the value of the specified argument key <code>name</code> 
+   */
   protected Pattern getPattern(String name) {
     try {
       String pat = args.get(name);
@@ -118,6 +133,10 @@ public abstract class AbstractAnalysisFa
     }
   }
 
+  /**
+   * Returns as {@link CharArraySet} from wordFiles, which
+   * can be a comma-separated list of filenames
+   */
   protected CharArraySet getWordSet(ResourceLoader loader,
       String wordFiles, boolean ignoreCase) throws IOException {
     assureMatchVersion();
@@ -137,6 +156,9 @@ public abstract class AbstractAnalysisFa
     return words;
   }
   
+  /**
+   * Returns the resource's lines (with content treated as UTF-8)
+   */
   protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
     return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
   }

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java Thu Aug 30 22:43:41 2012
@@ -78,7 +78,8 @@ public abstract class CharTokenizer exte
     charUtils = CharacterUtils.getInstance(matchVersion);
   }
   
-  private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
+  // note: bufferIndex is -1 here to best-effort AIOOBE consumers that don't call reset()
+  private int offset = 0, bufferIndex = -1, dataLen = 0, finalOffset = 0;
   private static final int MAX_WORD_LEN = 255;
   private static final int IO_BUFFER_SIZE = 4096;
   
@@ -162,8 +163,7 @@ public abstract class CharTokenizer exte
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
+  public void reset() throws IOException {
     bufferIndex = 0;
     offset = 0;
     dataLen = 0;

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java Thu Aug 30 22:43:41 2012
@@ -27,5 +27,9 @@ import java.io.IOException;
  */
 public interface ResourceLoaderAware {
 
+  /**
+   * Initializes this component with the provided ResourceLoader
+   * (used for loading classes, files, etc).
+   */
   void inform(ResourceLoader loader) throws IOException;
 }

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java Thu Aug 30 22:43:41 2012
@@ -19,6 +19,9 @@ package org.apache.lucene.analysis.util;
 
 /** Some commonly-used stemming functions */
 public class StemmerUtil {
+  /** no instance */
+  private StemmerUtil() {}
+
   /**
    * Returns true if the character array starts with the suffix.
    * 

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java Thu Aug 30 22:43:41 2012
@@ -36,7 +36,10 @@ import org.apache.lucene.util.Version;
  */
 public class WordlistLoader {
   
-  private static final int INITITAL_CAPACITY = 16;
+  private static final int INITIAL_CAPACITY = 16;
+  
+  /** no instance */
+  private WordlistLoader() {}
   
   /**
    * Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
@@ -74,7 +77,7 @@ public class WordlistLoader {
    * @return A {@link CharArraySet} with the reader's words
    */
   public static CharArraySet getWordSet(Reader reader, Version matchVersion) throws IOException {
-    return getWordSet(reader, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
+    return getWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
   }
 
   /**
@@ -89,7 +92,7 @@ public class WordlistLoader {
    * @return A CharArraySet with the reader's words
    */
   public static CharArraySet getWordSet(Reader reader, String comment, Version matchVersion) throws IOException {
-    return getWordSet(reader, comment, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
+    return getWordSet(reader, comment, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
   }
 
   /**
@@ -171,7 +174,7 @@ public class WordlistLoader {
    * @return A {@link CharArraySet} with the reader's words
    */
   public static CharArraySet getSnowballWordSet(Reader reader, Version matchVersion) throws IOException {
-    return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
+    return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
   }
 
 

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java Thu Aug 30 22:43:41 2012
@@ -318,19 +318,13 @@ public final class WikipediaTokenizer ex
   */
   @Override
   public void reset() throws IOException {
-    super.reset();
+    scanner.yyreset(input);
     tokens = null;
     scanner.reset();
     first = true;
   }
 
   @Override
-  public void setReader(Reader reader) throws IOException {
-    super.setReader(reader);
-    scanner.yyreset(input);
-  }
-
-  @Override
   public void end() {
     // set final offset
     final int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java Thu Aug 30 22:43:41 2012
@@ -39,6 +39,7 @@ public class CommonGramsFilterTest exten
     CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
     
     CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
+    cgf.reset();
     assertTrue(cgf.incrementToken());
     assertEquals("How", term.toString());
     assertTrue(cgf.incrementToken());
@@ -61,6 +62,7 @@ public class CommonGramsFilterTest exten
     CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
     
     CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
+    nsf.reset();
     assertTrue(nsf.incrementToken());
     assertEquals("How_the", term.toString());
     assertTrue(nsf.incrementToken());

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Thu Aug 30 22:43:41 2012
@@ -235,6 +235,7 @@ public class TestCompoundWordTokenFilter
         CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
     
     CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
+    tf.reset();
     assertTrue(tf.incrementToken());
     assertEquals("RindfleischÃ¼berwachungsgesetz", termAtt.toString());
     assertTrue(tf.incrementToken());
@@ -256,6 +257,7 @@ public class TestCompoundWordTokenFilter
         CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
         CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
     MockRetainAttribute retAtt = stream.addAttribute(MockRetainAttribute.class);
+    stream.reset();
     while (stream.incrementToken()) {
       assertTrue("Custom attribute value was lost", retAtt.getRetain());
     }

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java Thu Aug 30 22:43:41 2012
@@ -80,6 +80,7 @@ public class TestAnalyzers extends BaseT
 
   void verifyPayload(TokenStream ts) throws IOException {
     PayloadAttribute payloadAtt = ts.getAttribute(PayloadAttribute.class);
+    ts.reset();
     for(byte b=1;;b++) {
       boolean hasNext = ts.incrementToken();
       if (!hasNext) break;

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java Thu Aug 30 22:43:41 2012
@@ -66,6 +66,7 @@ public class TestStopAnalyzer extends Ba
     assertNotNull(stream);
     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
     
+    stream.reset();
     while (stream.incrementToken()) {
       String text = termAtt.toString();
       assertFalse(stopWordsSet.contains(text));
@@ -83,6 +84,7 @@ public class TestStopAnalyzer extends Ba
     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
     PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
 
+    stream.reset();
     while (stream.incrementToken()) {
       String text = termAtt.toString();
       assertFalse(stopWordsSet.contains(text));

Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java Thu Aug 30 22:43:41 2012
@@ -111,6 +111,7 @@ public class TestPatternTokenizer extend
     // assign bogus values
     in.clearAttributes();
     termAtt.setEmpty().append("bogusTerm");
+    in.reset();
     while (in.incrementToken()) {
       if (out.length() > 0)
         out.append(' ');

Modified: lucene/dev/branches/lucene3312/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java Thu Aug 30 22:43:41 2012
@@ -45,7 +45,8 @@ public final class ICUTokenizer extends 
   /** true length of text in the buffer */
   private int length = 0; 
   /** length in buffer that can be evaluated safely, up to a safe end point */
-  private int usableLength = 0; 
+  // note: usableLength is -1 here to best-effort AIOOBE consumers that don't call reset()
+  private int usableLength = -1; 
   /** accumulated offset of previous buffers for this reader, for offsetAtt */
   private int offset = 0; 
 
@@ -101,12 +102,6 @@ public final class ICUTokenizer extends 
     breaker.setText(buffer, 0, 0);
     length = usableLength = offset = 0;
   }
-
-  @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
-    reset();
-  }
   
   @Override
   public void end() {

Modified: lucene/dev/branches/lucene3312/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java Thu Aug 30 22:43:41 2012
@@ -245,14 +245,8 @@ public final class JapaneseTokenizer ext
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
-    buffer.reset(input);
-  }
-
-  @Override
   public void reset() throws IOException {
-    super.reset();
+    buffer.reset(input);
     resetState();
   }
 

Modified: lucene/dev/branches/lucene3312/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Thu Aug 30 22:43:41 2012
@@ -112,17 +112,10 @@ public final class SentenceTokenizer ext
 
   @Override
   public void reset() throws IOException {
-    super.reset();
     tokenStart = tokenEnd = 0;
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
-    reset();
-  }
-
-  @Override
   public void end() {
     // set final offset
     final int finalOffset = correctOffset(tokenEnd);

Modified: lucene/dev/branches/lucene3312/lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java Thu Aug 30 22:43:41 2012
@@ -220,7 +220,7 @@ public class Row {
    * Character.
    * 
    * @param way the Character associated with the desired Cell
-   * @return the reference, or -1 if the Cell is <tt>null,/tt>
+   * @return the reference, or -1 if the Cell is <tt>null</tt>
    */
   public int getRef(Character way) {
     Cell c = at(way);

Modified: lucene/dev/branches/lucene3312/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Thu Aug 30 22:43:41 2012
@@ -80,8 +80,7 @@ public abstract class BaseUIMATokenizer 
   }
 
   @Override
-  public void setReader(Reader input) throws IOException {
-    super.setReader(input);
+  public void reset() throws IOException {
     iterator = null;
   }
 

Modified: lucene/dev/branches/lucene3312/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/build.xml?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/build.xml (original)
+++ lucene/dev/branches/lucene3312/lucene/build.xml Thu Aug 30 22:43:41 2012
@@ -248,6 +248,10 @@
       <!-- spatial: problems -->
       <check-missing-javadocs dir="build/docs/suggest" level="class"/>
       <check-missing-javadocs dir="build/docs/test-framework" level="class"/>
+
+      <!-- too much to fix for now, but enforce full javadocs for key packages -->
+      <check-missing-javadocs dir="build/docs/core/org/apache/lucene/analysis" level="method"/>
+      <check-missing-javadocs dir="build/docs/core/org/apache/lucene/document" level="method"/>
     </sequential>
   </target>
   
@@ -452,16 +456,6 @@
     <sign-artifacts-macro artifacts.dir="${dist.dir}"/>
   </target>
 
-  <!-- ================================================================== -->
-  <!-- Build the JavaCC files into the source tree                        -->
-  <!-- ================================================================== -->
-
-  <target name="javacc" depends="javacc-check">
-  	<subant target="javacc" failonerror="true" inheritall="false">
-  	  <fileset dir="${common.dir}/queryparser" includes="build.xml"/>
-  	</subant>
-  </target>
-
   <target name="build-modules" depends="compile-test"
           description="Builds all additional modules and their tests">
     <modules-crawl target="build-artifacts-and-tests"/>
@@ -480,24 +474,6 @@
     <modules-crawl target="test" failonerror="true"/>
   </target>
 
-  <!--
-   compile changes.txt into an html file
-   -->
-  <macrodef name="build-changes">
-    <attribute name="changes.src.dir" default="${changes.src.dir}"/>
-    <attribute name="changes.target.dir" default="${changes.target.dir}"/>
-    <sequential>
-      <mkdir dir="@{changes.target.dir}"/>
-      <exec executable="perl" input="CHANGES.txt" output="@{changes.target.dir}/Changes.html"
-            failonerror="true" logError="true">
-        <arg value="@{changes.src.dir}/changes2html.pl"/>
-      </exec>
-      <copy todir="@{changes.target.dir}">
-        <fileset dir="@{changes.src.dir}" includes="*.css"/>
-      </copy>
-    </sequential>
-  </macrodef>
-
   <target name="changes-to-html">
     <build-changes changes.src.dir="${changes.src.dir}" changes.target.dir="${changes.target.dir}" />
   </target>

Modified: lucene/dev/branches/lucene3312/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/common-build.xml?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/common-build.xml (original)
+++ lucene/dev/branches/lucene3312/lucene/common-build.xml Thu Aug 30 22:43:41 2012
@@ -173,7 +173,6 @@
   <property name="m2.repository.url" value="file://${maven.dist.dir}"/>
   <property name="m2.repository.private.key" value="${user.home}/.ssh/id_dsa"/>
 
-  <property name="javacc.home" location="${common.dir}"/>
   <property name="jflex.home" location="${common.dir}"/>
 
   <path id="jflex.classpath">
@@ -185,12 +184,6 @@
     </fileset>
   </path>
 
-  <path id="javacc.classpath">
-    <fileset dir="${javacc.home}/">
-      <include name="bin/lib/*.jar"/>
-    </fileset>
-  </path>
-
   <property name="backwards.dir" location="backwards"/>
   <property name="build.dir.backwards" location="${build.dir}/backwards"/>
 
@@ -261,23 +254,6 @@
     <delete file="velocity.log"/>
   </target>
 
-  <!-- TODO: maybe make JavaCC checking specific to only the projects
-             that use it (Lucene core and queryparsers)
-  -->
-  <target name="javacc-uptodate-check">
-    <uptodate property="javacc.files.uptodate">
-      <srcfiles dir="${src.dir}" includes="**/*.jj" />
-      <mapper type="glob" from="*.jj" to="*.java"/>
-    </uptodate>
-  </target>
-
-  <target name="javacc-notice" depends="javacc-uptodate-check" unless="javacc.files.uptodate">
-    <echo>
-      One or more of the JavaCC .jj files is newer than its corresponding
-      .java file.  Run the "javacc" target to regenerate the artifacts.
-    </echo>
-  </target>
-
   <target name="init" depends="resolve">
     <!-- currently empty -->
   </target>
@@ -391,36 +367,6 @@
     </echo>
   </target>
 
-  <target name="javacc-check">
-    <available property="javacc.present" classname="org.javacc.parser.Main">
-      <classpath refid="javacc.classpath"/>
-    </available>
-    <fail unless="javacc.present">
-      ##################################################################
-      JavaCC not found.
-      JavaCC Home: ${javacc.home}
-
-      Please download and install JavaCC 4.1 from:
-
-      &lt;http://javacc.dev.java.net&gt;
-
-      Then, create a build.properties file either in your home
-      directory, or within the Lucene directory and set the javacc.home
-      property to the path where JavaCC is installed. For example,
-      if you installed JavaCC in /usr/local/java/javacc-4.1, then set the
-      javacc.home property to:
-
-      javacc.home=/usr/local/java/javacc-4.1
-
-      If you get an error like the one below, then you have not installed
-      things correctly. Please check all your paths and try again.
-
-      java.lang.NoClassDefFoundError: org.javacc.parser.Main
-      ##################################################################
-    </fail>
-
-  </target>
-	
   <target name="jflex-check">
     <available property="jflex.present" classname="jflex.anttask.JFlexTask">
       <classpath refid="jflex.classpath"/>
@@ -508,6 +454,9 @@
     <attribute name="spec.version"/>
     <attribute name="manifest.file" default="${manifest.file}"/>
     <sequential>
+      <!-- If possible, include the svnversion -->
+      <exec dir="." executable="${svnversion.exe}" outputproperty="svnversion" failifexecutionfails="false"/>
+
       <manifest file="@{manifest.file}">
         <!--
         http://java.sun.com/j2se/1.5.0/docs/guide/jar/jar.html#JAR%20Manifest
@@ -558,12 +507,6 @@
     <attribute name="manifest.file" default="${manifest.file}"/>
     <element name="nested" optional="true" implicit="true"/>
     <sequential>
-      <!-- If possible, include the svnversion -->
-      <exec dir="." executable="${svnversion.exe}"
-            outputproperty="svnversion" failifexecutionfails="false">
-        <arg value="."/>
-      </exec>
-      
       <build-manifest title="@{title}"
                       implementation.title="@{implementation.title}"
                       spec.version="@{spec.version}"
@@ -850,7 +793,14 @@
             <sysproperty key="tests.multiplier" value="@{tests.multiplier}"/>
             
             <!-- Temporary directory in the cwd. -->
-            <sysproperty key="tempDir" value="."/>
+            <sysproperty key="tempDir" value="." />
+            <sysproperty key="java.io.tmpdir" value="." />
+
+            <!-- Restrict access to certain Java features and install security manager: -->
+            <sysproperty key="tests.sandbox.dir" value="${build.dir}" />
+            <sysproperty key="clover.db.dir" value="${clover.db.dir}" />
+            <sysproperty key="java.security.manager" value="java.lang.SecurityManager" />
+            <sysproperty key="java.security.policy" value="${common.dir}/tools/junit4/tests.policy" />
 
             <sysproperty key="lucene.version" value="${dev.version}"/>
 
@@ -1381,31 +1331,11 @@ ${tests-output}/junit4-*.suites     - pe
         <!-- <compilerarg line="-Xmaxwarns 10000000"/>
         <compilerarg line="-Xmaxerrs 10000000"/> -->
         <!-- for generics in Java 1.5: -->
-	<compilerarg line="${javac.args}"/>
+        <compilerarg line="${javac.args}"/>
       </javac>
     </sequential>
   </macrodef>
 
-  <macrodef name="invoke-javacc">
-    <attribute name="target"/>
-    <attribute name="outputDir"/>
-    <sequential>
-      <mkdir dir="@{outputDir}"/>
-      <javacc
-          target="@{target}"
-          outputDirectory="@{outputDir}"
-          debugTokenManager="${javacc.debug.tokenmgr}"
-          debugParser="${javacc.debug.parser}"
-          debuglookahead="${javacc.debug.lookahead}"
-          javacchome="${javacc.home}"
-          jdkversion="${javac.source}"
-      />
-      <fixcrlf srcdir="@{outputDir}" includes="*.java" encoding="UTF-8">
-        <containsregexp expression="Generated.*By.*JavaCC"/>
-      </fixcrlf>
-    </sequential>
-  </macrodef>
-
   <property name="failonjavadocwarning" value="true"/>
   <macrodef name="invoke-javadoc">
     <element name="sources" optional="yes"/>
@@ -1547,10 +1477,10 @@ ${tests-output}/junit4-*.suites     - pe
             description="Populates properties svn.URL and svn.Revision using 'svn info'.">
     <attribute name="directory"/>
     <sequential>
-      <exec dir="." executable="${svnversion.exe}" outputproperty="svn.ver"/>
+      <exec dir="@{directory}" executable="${svnversion.exe}" outputproperty="svn.ver"/>
       <fail message="A subversion checkout is required for this target">
         <condition>
-          <equals arg1="${svn.ver}" arg2="exported"/>
+          <matches pattern="(exported|unversioned.*)" string="${svn.ver}" casesensitive="false"/>
         </condition>
       </fail>
       <exec dir="@{directory}" executable="${svn.exe}" outputproperty="svn.info" failonerror="true">
@@ -1697,7 +1627,7 @@ ${tests-output}/junit4-*.suites     - pe
     <element name="nested" optional="false" implicit="true"/>
     <sequential>
       <copy todir="@{todir}" flatten="@{flatten}" overwrite="@{overwrite}" verbose="true"
-        preservelastmodified="false" encoding="UTF-8" outputencoding="UTF-8"
+        preservelastmodified="false" encoding="UTF-8" outputencoding="UTF-8" taskname="pegdown"
       >
         <filterchain>
           <tokenfilter>
@@ -1757,4 +1687,22 @@ ${tests-output}/junit4-*.suites     - pe
      </sequential>
   </macrodef>
 
+  <!--
+   compile changes.txt into an html file
+   -->
+  <macrodef name="build-changes">
+    <attribute name="changes.src.dir" default="${changes.src.dir}"/>
+    <attribute name="changes.target.dir" default="${changes.target.dir}"/>
+    <sequential>
+      <mkdir dir="@{changes.target.dir}"/>
+      <exec executable="perl" input="CHANGES.txt" output="@{changes.target.dir}/Changes.html"
+            failonerror="true" logError="true">
+        <arg value="@{changes.src.dir}/changes2html.pl"/>
+      </exec>
+      <copy todir="@{changes.target.dir}">
+        <fileset dir="@{changes.src.dir}" includes="*.css"/>
+      </copy>
+    </sequential>
+  </macrodef>
+
 </project>

Modified: lucene/dev/branches/lucene3312/lucene/core/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/build.xml?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/build.xml (original)
+++ lucene/dev/branches/lucene3312/lucene/core/build.xml Thu Aug 30 22:43:41 2012
@@ -38,8 +38,6 @@
     <pathelement path="${java.class.path}"/>
   </path>
 
-  <target name="compile-core" depends="jflex-notice, javacc-notice, common.compile-core"/>
-  
   <target name="test-core" depends="common.test"/>
 
   <target name="javadocs-core" depends="javadocs"/>

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java Thu Aug 30 22:43:41 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.util.CloseableThreadLocal;
 
+import java.io.Closeable;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.HashMap;
@@ -67,14 +68,26 @@ import java.util.Map;
  *       Analysis integration with Apache UIMA. 
  * </ul>
  */
-public abstract class Analyzer {
+public abstract class Analyzer implements Closeable {
 
   private final ReuseStrategy reuseStrategy;
 
+  /**
+   * Create a new Analyzer, reusing the same set of components per-thread
+   * across calls to {@link #tokenStream(String, Reader)}. 
+   */
   public Analyzer() {
     this(new GlobalReuseStrategy());
   }
 
+  /**
+   * Expert: create a new Analyzer with a custom {@link ReuseStrategy}.
+   * <p>
+   * NOTE: if you just want to reuse on a per-field basis, its easier to
+   * use a subclass of {@link AnalyzerWrapper} such as 
+   * <a href="{@docRoot}/../analyzers-common/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.html">
+   * PerFieldAnalyerWrapper</a> instead.
+   */
   public Analyzer(ReuseStrategy reuseStrategy) {
     this.reuseStrategy = reuseStrategy;
   }
@@ -93,20 +106,25 @@ public abstract class Analyzer {
       Reader reader);
 
   /**
-   * Creates a TokenStream that is allowed to be re-use from the previous time
-   * that the same thread called this method.  Callers that do not need to use
-   * more than one TokenStream at the same time from this analyzer should use
-   * this method for better performance.
+   * Returns a TokenStream suitable for <code>fieldName</code>, tokenizing
+   * the contents of <code>reader</code>.
    * <p>
    * This method uses {@link #createComponents(String, Reader)} to obtain an
    * instance of {@link TokenStreamComponents}. It returns the sink of the
    * components and stores the components internally. Subsequent calls to this
    * method will reuse the previously stored components after resetting them
    * through {@link TokenStreamComponents#setReader(Reader)}.
-   * </p>
+   * <p>
+   * <b>NOTE:</b> After calling this method, the consumer must follow the 
+   * workflow described in {@link TokenStream} to properly consume its contents.
+   * See the {@link org.apache.lucene.analysis Analysis package documentation} for
+   * some examples demonstrating this.
    * 
    * @param fieldName the name of the field the created TokenStream is used for
    * @param reader the reader the streams source reads from
+   * @return TokenStream for iterating the analyzed content of <code>reader</code>
+   * @throws AlreadyClosedException if the Analyzer is closed.
+   * @throws IOException if an i/o error occurs.
    */
   public final TokenStream tokenStream(final String fieldName,
                                        final Reader reader) throws IOException {
@@ -123,6 +141,13 @@ public abstract class Analyzer {
   
   /**
    * Override this if you want to add a CharFilter chain.
+   * <p>
+   * The default implementation returns <code>reader</code>
+   * unchanged.
+   * 
+   * @param fieldName IndexableField name being indexed
+   * @param reader original Reader
+   * @return reader, optionally decorated with CharFilter(s)
    */
   protected Reader initReader(String fieldName, Reader reader) {
     return reader;
@@ -139,7 +164,8 @@ public abstract class Analyzer {
    * exact PhraseQuery matches, for instance, across IndexableField instance boundaries.
    *
    * @param fieldName IndexableField name being indexed.
-   * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
+   * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}.
+   *         This value must be {@code >= 0}.
    */
   public int getPositionIncrementGap(String fieldName) {
     return 0;
@@ -152,7 +178,8 @@ public abstract class Analyzer {
    * produced at least one token for indexing.
    *
    * @param fieldName the field just indexed
-   * @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
+   * @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}.
+   *         This value must be {@code >= 0}.
    */
   public int getOffsetGap(String fieldName) {
     return 1;
@@ -171,7 +198,14 @@ public abstract class Analyzer {
    * {@link Analyzer#tokenStream(String, Reader)}.
    */
   public static class TokenStreamComponents {
+    /**
+     * Original source of the tokens.
+     */
     protected final Tokenizer source;
+    /**
+     * Sink tokenstream, such as the outer tokenfilter decorating
+     * the chain. This can be the source if there are no filters.
+     */
     protected final TokenStream sink;
 
     /**
@@ -235,10 +269,13 @@ public abstract class Analyzer {
    * Strategy defining how TokenStreamComponents are reused per call to
    * {@link Analyzer#tokenStream(String, java.io.Reader)}.
    */
-  public static abstract class ReuseStrategy {
+  public static abstract class ReuseStrategy implements Closeable {
 
     private CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
 
+    /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+    public ReuseStrategy() {}
+
     /**
      * Gets the reusable TokenStreamComponents for the field with the given name
      *
@@ -262,6 +299,7 @@ public abstract class Analyzer {
      * Returns the currently stored value
      *
      * @return Currently stored value or {@code null} if no value is stored
+     * @throws AlreadyClosedException if the ReuseStrategy is closed.
      */
     protected final Object getStoredValue() {
       try {
@@ -279,6 +317,7 @@ public abstract class Analyzer {
      * Sets the stored value
      *
      * @param storedValue Value to store
+     * @throws AlreadyClosedException if the ReuseStrategy is closed.
      */
     protected final void setStoredValue(Object storedValue) {
       try {
@@ -296,8 +335,10 @@ public abstract class Analyzer {
      * Closes the ReuseStrategy, freeing any resources
      */
     public void close() {
-      storedValue.close();
-      storedValue = null;
+      if (storedValue != null) {
+        storedValue.close();
+        storedValue = null;
+      }
     }
   }
 
@@ -306,17 +347,16 @@ public abstract class Analyzer {
    * every field.
    */
   public final static class GlobalReuseStrategy extends ReuseStrategy {
+    
+    /** Creates a new instance, with empty per-thread values */
+    public GlobalReuseStrategy() {}
 
-    /**
-     * {@inheritDoc}
-     */
+    @Override
     public TokenStreamComponents getReusableComponents(String fieldName) {
       return (TokenStreamComponents) getStoredValue();
     }
 
-    /**
-     * {@inheritDoc}
-     */
+    @Override
     public void setReusableComponents(String fieldName, TokenStreamComponents components) {
       setStoredValue(components);
     }
@@ -328,19 +368,18 @@ public abstract class Analyzer {
    */
   public static class PerFieldReuseStrategy extends ReuseStrategy {
 
-    /**
-     * {@inheritDoc}
-     */
+    /** Creates a new instance, with empty per-thread-per-field values */
+    public PerFieldReuseStrategy() {}
+
     @SuppressWarnings("unchecked")
+    @Override
     public TokenStreamComponents getReusableComponents(String fieldName) {
       Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
       return componentsPerField != null ? componentsPerField.get(fieldName) : null;
     }
 
-    /**
-     * {@inheritDoc}
-     */
     @SuppressWarnings("unchecked")
+    @Override
     public void setReusableComponents(String fieldName, TokenStreamComponents components) {
       Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
       if (componentsPerField == null) {

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java Thu Aug 30 22:43:41 2012
@@ -61,25 +61,16 @@ public abstract class AnalyzerWrapper ex
    */
   protected abstract TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components);
 
-  /**
-   * {@inheritDoc}
-   */
   @Override
   protected final TokenStreamComponents createComponents(String fieldName, Reader aReader) {
     return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName, aReader));
   }
 
-  /**
-   * {@inheritDoc}
-   */
   @Override
   public final int getPositionIncrementGap(String fieldName) {
     return getWrappedAnalyzer(fieldName).getPositionIncrementGap(fieldName);
   }
 
-  /**
-   * {@inheritDoc}
-   */
   @Override
   public final int getOffsetGap(String fieldName) {
     return getWrappedAnalyzer(fieldName).getOffsetGap(fieldName);

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java Thu Aug 30 22:43:41 2012
@@ -38,6 +38,11 @@ public final class CachingTokenFilter ex
   private Iterator<AttributeSource.State> iterator = null; 
   private AttributeSource.State finalState;
   
+  /**
+   * Create a new CachingTokenFilter around <code>input</code>,
+   * caching its token attributes, which can be replayed again
+   * after a call to {@link #reset()}.
+   */
   public CachingTokenFilter(TokenStream input) {
     super(input);
   }
@@ -67,6 +72,13 @@ public final class CachingTokenFilter ex
     }
   }
 
+  /**
+   * Rewinds the iterator to the beginning of the cached list.
+   * <p>
+   * Note that this does not call reset() on the wrapped tokenstream ever, even
+   * the first time. You should reset() the inner tokenstream before wrapping
+   * it with CachingTokenFilter.
+   */
   @Override
   public void reset() {
     if(cache != null) {

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java Thu Aug 30 22:43:41 2012
@@ -33,6 +33,9 @@ import java.io.Reader;
  * You can optionally provide more efficient implementations of additional methods 
  * like {@link #read()}, {@link #read(char[])}, {@link #read(java.nio.CharBuffer)},
  * but this is not required.
+ * <p>
+ * For examples and integration with {@link Analyzer}, see the 
+ * {@link org.apache.lucene.analysis Analysis package documentation}.
  */
 // the way java.io.FilterReader should work!
 public abstract class CharFilter extends Reader {
@@ -52,6 +55,10 @@ public abstract class CharFilter extends
   
   /** 
    * Closes the underlying input stream.
+   * <p>
+   * <b>NOTE:</b> 
+   * The default implementation closes the input Reader, so
+   * be sure to call <code>super.close()</code> when overriding this method.
    */
   @Override
   public void close() throws IOException {

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java Thu Aug 30 22:43:41 2012
@@ -144,6 +144,12 @@ public final class NumericTokenStream ex
     private long value = 0L;
     private int valueSize = 0, shift = 0, precisionStep = 0;
     private BytesRef bytes = new BytesRef();
+    
+    /** 
+     * Creates, but does not yet initialize this attribute instance
+     * @see #init(long, int, int, int)
+     */
+    public NumericTermAttributeImpl() {}
 
     public BytesRef getBytesRef() {
       return bytes;

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Token.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Token.java Thu Aug 30 22:43:41 2012
@@ -176,8 +176,8 @@ public class Token extends CharTermAttri
    *  instead use the char[] termBuffer methods to set the
    *  term text.
    *  @param text term text
-   *  @param start start offset
-   *  @param end end offset
+   *  @param start start offset in the source text
+   *  @param end end offset in the source text
    */
   public Token(String text, int start, int end) {
     checkOffsets(start, end);
@@ -191,8 +191,8 @@ public class Token extends CharTermAttri
    *  speed you should instead use the char[] termBuffer
    *  methods to set the term text.
    *  @param text term text
-   *  @param start start offset
-   *  @param end end offset
+   *  @param start start offset in the source text
+   *  @param end end offset in the source text
    *  @param typ token type
    */
   public Token(String text, int start, int end, String typ) {
@@ -208,9 +208,9 @@ public class Token extends CharTermAttri
    *  offsets, & type.  <b>NOTE:</b> for better indexing
    *  speed you should instead use the char[] termBuffer
    *  methods to set the term text.
-   * @param text
-   * @param start
-   * @param end
+   * @param text term text
+   * @param start start offset in the source text
+   * @param end end offset in the source text
    * @param flags token type bits
    */
   public Token(String text, int start, int end, int flags) {
@@ -225,11 +225,11 @@ public class Token extends CharTermAttri
    *  Constructs a Token with the given term buffer (offset
    *  & length), start and end
    *  offsets
-   * @param startTermBuffer
-   * @param termBufferOffset
-   * @param termBufferLength
-   * @param start
-   * @param end
+   * @param startTermBuffer buffer containing term text
+   * @param termBufferOffset the index in the buffer of the first character
+   * @param termBufferLength number of valid characters in the buffer
+   * @param start start offset in the source text
+   * @param end end offset in the source text
    */
   public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) {
     checkOffsets(start, end);
@@ -238,31 +238,9 @@ public class Token extends CharTermAttri
     endOffset = end;
   }
 
-  /** Set the position increment.  This determines the position of this token
-   * relative to the previous Token in a {@link TokenStream}, used in phrase
-   * searching.
-   *
-   * <p>The default value is one.
-   *
-   * <p>Some common uses for this are:<ul>
-   *
-   * <li>Set it to zero to put multiple terms in the same position.  This is
-   * useful if, e.g., a word has multiple stems.  Searches for phrases
-   * including either stem will match.  In this case, all but the first stem's
-   * increment should be set to zero: the increment of the first instance
-   * should be one.  Repeating a token with an increment of zero can also be
-   * used to boost the scores of matches on that token.
-   *
-   * <li>Set it to values greater than one to inhibit exact phrase matches.
-   * If, for example, one does not want phrases to match across removed stop
-   * words, then one could build a stop word filter that removes stop words and
-   * also sets the increment to the number of stop words removed before each
-   * non-stop word.  Then exact phrase queries will only match when the terms
-   * occur with no intervening stop words.
-   *
-   * </ul>
-   * @param positionIncrement the distance from the prior term
-   * @see org.apache.lucene.index.DocsAndPositionsEnum
+  /**
+   * {@inheritDoc}
+   * @see PositionIncrementAttribute
    */
   public void setPositionIncrement(int positionIncrement) {
     if (positionIncrement < 0)
@@ -271,93 +249,101 @@ public class Token extends CharTermAttri
     this.positionIncrement = positionIncrement;
   }
 
-  /** Returns the position increment of this Token.
-   * @see #setPositionIncrement
+  /**
+   * {@inheritDoc}
+   * @see PositionIncrementAttribute
    */
   public int getPositionIncrement() {
     return positionIncrement;
   }
 
-  /** Set the position length.
-   * @see PositionLengthAttribute */
+  /**
+   * {@inheritDoc}
+   * @see PositionLengthAttribute
+   */
   @Override
   public void setPositionLength(int positionLength) {
     this.positionLength = positionLength;
   }
 
-  /** Get the position length.
-   * @see PositionLengthAttribute */
+  /**
+   * {@inheritDoc}
+   * @see PositionLengthAttribute
+   */
   @Override
   public int getPositionLength() {
     return positionLength;
   }
 
-  /** Returns this Token's starting offset, the position of the first character
-    corresponding to this token in the source text.
-
-    Note that the difference between endOffset() and startOffset() may not be
-    equal to {@link #length}, as the term text may have been altered by a
-    stemmer or some other filter. */
+  /**
+   * {@inheritDoc}
+   * @see OffsetAttribute
+   */
   public final int startOffset() {
     return startOffset;
   }
 
-  /** Returns this Token's ending offset, one greater than the position of the
-    last character corresponding to this token in the source text. The length
-    of the token in the source text is (endOffset - startOffset). */
+  /**
+   * {@inheritDoc}
+   * @see OffsetAttribute
+   */
   public final int endOffset() {
     return endOffset;
   }
 
-  /** Set the starting and ending offset.
-  @see #startOffset() and #endOffset()*/
+  /**
+   * {@inheritDoc}
+   * @see OffsetAttribute
+   */
   public void setOffset(int startOffset, int endOffset) {
     checkOffsets(startOffset, endOffset);
     this.startOffset = startOffset;
     this.endOffset = endOffset;
   }
 
-  /** Returns this Token's lexical type.  Defaults to "word". */
+  /**
+   * {@inheritDoc}
+   * @see TypeAttribute
+   */
   public final String type() {
     return type;
   }
 
-  /** Set the lexical type.
-      @see #type() */
+  /**
+   * {@inheritDoc}
+   * @see TypeAttribute
+   */
   public final void setType(String type) {
     this.type = type;
   }
 
   /**
-   * <p/>
-   *
-   * Get the bitset for any bits that have been set.  This is completely distinct from {@link #type()}, although they do share similar purposes.
-   * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
-   *
-   * 
-   * @return The bits
-   * @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
+   * {@inheritDoc}
+   * @see FlagsAttribute
    */
   public int getFlags() {
     return flags;
   }
 
   /**
-   * @see #getFlags()
+   * {@inheritDoc}
+   * @see FlagsAttribute
    */
   public void setFlags(int flags) {
     this.flags = flags;
   }
 
   /**
-   * Returns this Token's payload.
-   */ 
+   * {@inheritDoc}
+   * @see PayloadAttribute
+   */
   public BytesRef getPayload() {
     return this.payload;
   }
 
-  /** 
-   * Sets this Token's payload.
+  /**
+   * {@inheritDoc}
+   * @see PayloadAttribute
    */
   public void setPayload(BytesRef payload) {
     this.payload = payload;
@@ -551,8 +537,8 @@ public class Token extends CharTermAttri
 
   /**
    * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
-   * @param prototype
-   * @param newTerm
+   * @param prototype existing Token
+   * @param newTerm new term text
    */
   public void reinit(Token prototype, String newTerm) {
     setEmpty().append(newTerm);
@@ -566,10 +552,10 @@ public class Token extends CharTermAttri
 
   /**
    * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
-   * @param prototype
-   * @param newTermBuffer
-   * @param offset
-   * @param length
+   * @param prototype existing Token
+   * @param newTermBuffer buffer containing new term text
+   * @param offset the index in the buffer of the first character
+   * @param length number of valid characters in the buffer
    */
   public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) {
     copyBuffer(newTermBuffer, offset, length);

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenFilter.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenFilter.java Thu Aug 30 22:43:41 2012
@@ -34,21 +34,37 @@ public abstract class TokenFilter extend
     this.input = input;
   }
   
-  /** Performs end-of-stream operations, if any, and calls then <code>end()</code> on the
-   * input TokenStream.<p/> 
-   * <b>NOTE:</b> Be sure to call <code>super.end()</code> first when overriding this method.*/
+  /** 
+   * {@inheritDoc}
+   * <p> 
+   * <b>NOTE:</b> 
+   * The default implementation chains the call to the input TokenStream, so
+   * be sure to call <code>super.end()</code> first when overriding this method.
+   */
   @Override
   public void end() throws IOException {
     input.end();
   }
   
-  /** Close the input TokenStream. */
+  /**
+   * {@inheritDoc}
+   * <p>
+   * <b>NOTE:</b> 
+   * The default implementation chains the call to the input TokenStream, so
+   * be sure to call <code>super.close()</code> when overriding this method.
+   */
   @Override
   public void close() throws IOException {
     input.close();
   }
 
-  /** Reset the filter as well as the input TokenStream. */
+  /**
+   * {@inheritDoc}
+   * <p>
+   * <b>NOTE:</b> 
+   * The default implementation chains the call to the input TokenStream, so
+   * be sure to call <code>super.reset()</code> when overriding this method.
+   */
   @Override
   public void reset() throws IOException {
     input.reset();

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java Thu Aug 30 22:43:41 2012
@@ -170,12 +170,8 @@ public abstract class TokenStream extend
    * This method is called by a consumer before it begins consumption using
    * {@link #incrementToken()}.
    * <p/>
-   * Resets this stream to the beginning.  As all TokenStreams must be reusable,
-   * any implementations which have state that needs to be reset between usages
-   * of the TokenStream, must implement this method. Note that if your TokenStream
-   * caches tokens and feeds them back again after a reset, it is imperative
-   * that you clone the tokens when you store them away (on the first pass) as
-   * well as when you return them (on future passes after {@link #reset()}).
+   * Resets this stream to a clean state. Stateful implementations must implement
+   * this method so that they can be reused, just as if they had been created fresh.
    */
   public void reset() throws IOException {}
   

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java Thu Aug 30 22:43:41 2012
@@ -54,7 +54,13 @@ public abstract class Tokenizer extends 
     this.input = input;
   }
   
-  /** By default, closes the input Reader. */
+  /**
+   * {@inheritDoc}
+   * <p>
+   * <b>NOTE:</b> 
+   * The default implementation closes the input Reader, so
+   * be sure to call <code>super.close()</code> when overriding this method.
+   */
   @Override
   public void close() throws IOException {
     if (input != null) {
@@ -76,12 +82,18 @@ public abstract class Tokenizer extends 
     return (input instanceof CharFilter) ? ((CharFilter) input).correctOffset(currentOff) : currentOff;
   }
 
-  /** Expert: Reset the tokenizer to a new reader.  Typically, an
+  /** Expert: Set a new reader on the Tokenizer.  Typically, an
    *  analyzer (in its tokenStream method) will use
    *  this to re-use a previously created tokenizer. */
-  public void setReader(Reader input) throws IOException {
+  public final void setReader(Reader input) throws IOException {
     assert input != null: "input must not be null";
     this.input = input;
+    assert setReaderTestPoint();
+  }
+  
+  // only used by assert, for testing
+  boolean setReaderTestPoint() {
+    return true;
   }
 }
 

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/package.html?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/package.html (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/package.html Thu Aug 30 22:43:41 2012
@@ -817,5 +817,30 @@ As a small hint, this is how the new Att
 
   ...
 </pre>
+<h4>Adding a CharFilter chain</h4>
+Analyzers take Java {@link java.io.Reader}s as input. Of course you can wrap your Readers with {@link java.io.FilterReader}s
+to manipulate content, but this would have the big disadvantage that character offsets might be inconsistent with your original
+text.
+<p>
+{@link org.apache.lucene.analysis.CharFilter} is designed to allow you to pre-process input like a FilterReader would, but also
+preserve the original offsets associated with those characters. This way mechanisms like highlighting still work correctly.
+CharFilters can be chained.
+<p>
+Example:
+<pre class="prettyprint">
+public class MyAnalyzer extends Analyzer {
+
+  {@literal @Override}
+  protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+    return new TokenStreamComponents(new MyTokenizer(reader));
+  }
+  
+  {@literal @Override}
+  protected Reader initReader(String fieldName, Reader reader) {
+    // wrap the Reader in a CharFilter chain.
+    return new SecondCharFilter(new FirstCharFilter(reader));
+  }
+}
+</pre>
 </body>
 </html>

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java Thu Aug 30 22:43:41 2012
@@ -38,7 +38,11 @@ public interface CharTermAttribute exten
    *  #resizeBuffer(int)} to increase it.  After
    *  altering the buffer be sure to call {@link
    *  #setLength} to record the number of valid
-   *  characters that were placed into the termBuffer. */
+   *  characters that were placed into the termBuffer. 
+   *  <p>
+   *  <b>NOTE</b>: The returned buffer may be larger than
+   *  the valid {@link #length()}.
+   */
   public char[] buffer();
 
   /** Grows the termBuffer to at least size newSize, preserving the

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java Thu Aug 30 22:43:41 2012
@@ -26,14 +26,15 @@ import org.apache.lucene.util.AttributeR
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.UnicodeUtil;
 
-/**
- * The term text of a Token.
- */
+/** Default implementation of {@link CharTermAttribute}. */
 public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttribute, TermToBytesRefAttribute, Cloneable {
   private static int MIN_BUFFER_SIZE = 10;
   
   private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
   private int termLength = 0;
+  
+  /** Initialize this attribute with empty term text */
+  public CharTermAttributeImpl() {}
 
   public final void copyBuffer(char[] buffer, int offset, int length) {
     growTermBuffer(length);

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java Thu Aug 30 22:43:41 2012
@@ -22,22 +22,23 @@ import org.apache.lucene.util.Attribute;
 
 /**
  * This attribute can be used to pass different flags down the {@link Tokenizer} chain,
- * eg from one TokenFilter to another one. 
+ * e.g. from one TokenFilter to another one. 
+ * <p>
+ * This is completely distinct from {@link TypeAttribute}, although they do share similar purposes.
+ * The flags can be used to encode information about the token for use by other 
+ * {@link org.apache.lucene.analysis.TokenFilter}s.
  * @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
  */
 public interface FlagsAttribute extends Attribute {
   /**
-   * <p/>
-   *
-   * Get the bitset for any bits that have been set.  This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes.
-   * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
-   *
-   *
+   * Get the bitset for any bits that have been set.  
    * @return The bits
+   * @see #getFlags()
    */
   public int getFlags();
 
   /**
+   * Set the flags to a new bitset.
    * @see #getFlags()
    */
   public void setFlags(int flags);  

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java Thu Aug 30 22:43:41 2012
@@ -19,30 +19,17 @@ package org.apache.lucene.analysis.token
 
 import org.apache.lucene.util.AttributeImpl;
 
-/**
- * This attribute can be used to pass different flags down the tokenizer chain,
- * eg from one TokenFilter to another one. 
- * @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
- */
+/** Default implementation of {@link FlagsAttribute}. */
 public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute, Cloneable {
   private int flags = 0;
   
-  /**
-   * <p/>
-   *
-   * Get the bitset for any bits that have been set.  This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes.
-   * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
-   *
-   *
-   * @return The bits
-   */
+  /** Initialize this attribute with no bits set */
+  public FlagsAttributeImpl() {}
+  
   public int getFlags() {
     return flags;
   }
 
-  /**
-   * @see #getFlags()
-   */
   public void setFlags(int flags) {
     this.flags = flags;
   }

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java Thu Aug 30 22:43:41 2012
@@ -30,20 +30,22 @@ import org.apache.lucene.util.Attribute;
 public interface KeywordAttribute extends Attribute {
 
   /**
-   * Returns <code>true</code> iff the current token is a keyword, otherwise
-   * <code>false</code>/
+   * Returns <code>true</code> if the current token is a keyword, otherwise
+   * <code>false</code>
    * 
-   * @return <code>true</code> iff the current token is a keyword, otherwise
-   *         <code>false</code>/
+   * @return <code>true</code> if the current token is a keyword, otherwise
+   *         <code>false</code>
+   * @see #setKeyword(boolean)
    */
   public boolean isKeyword();
 
   /**
-   * Marks the current token as keyword iff set to <code>true</code>.
+   * Marks the current token as keyword if set to <code>true</code>.
    * 
    * @param isKeyword
-   *          <code>true</code> iff the current token is a keyword, otherwise
+   *          <code>true</code> if the current token is a keyword, otherwise
    *          <code>false</code>.
+   * @see #isKeyword()
    */
   public void setKeyword(boolean isKeyword);
 }

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java Thu Aug 30 22:43:41 2012
@@ -17,19 +17,15 @@ package org.apache.lucene.analysis.token
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.util.AttributeImpl;
 
-/**
- *This attribute can be used to mark a token as a keyword. Keyword aware
- * {@link TokenStream}s can decide to modify a token based on the return value
- * of {@link #isKeyword()} if the token is modified. Stemming filters for
- * instance can use this attribute to conditionally skip a term if
- * {@link #isKeyword()} returns <code>true</code>.
- */
+/** Default implementation of {@link KeywordAttribute}. */
 public final class KeywordAttributeImpl extends AttributeImpl implements
     KeywordAttribute {
   private boolean keyword;
+  
+  /** Initialize this attribute with the keyword value as false. */
+  public KeywordAttributeImpl() {}
 
   @Override
   public void clear() {
@@ -57,24 +53,10 @@ public final class KeywordAttributeImpl 
     return keyword == other.keyword;
   }
 
-  /**
-   * Returns <code>true</code> iff the current token is a keyword, otherwise
-   * <code>false</code>/
-   * 
-   * @return <code>true</code> iff the current token is a keyword, otherwise
-   *         <code>false</code>/
-   */
   public boolean isKeyword() {
     return keyword;
   }
 
-  /**
-   * Marks the current token as keyword iff set to <code>true</code>.
-   * 
-   * @param isKeyword
-   *          <code>true</code> iff the current token is a keyword, otherwise
-   *          <code>false</code>.
-   */
   public void setKeyword(boolean isKeyword) {
     keyword = isKeyword;
   }

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java Thu Aug 30 22:43:41 2012
@@ -23,22 +23,34 @@ import org.apache.lucene.util.Attribute;
  * The start and end character offset of a Token. 
  */
 public interface OffsetAttribute extends Attribute {
-  /** Returns this Token's starting offset, the position of the first character
-  corresponding to this token in the source text.
-
-  Note that the difference between endOffset() and startOffset() may not be
-  equal to termText.length(), as the term text may have been altered by a
-  stemmer or some other filter. */
+  /** 
+   * Returns this Token's starting offset, the position of the first character
+   * corresponding to this token in the source text.
+   * <p>
+   * Note that the difference between {@link #endOffset()} and <code>startOffset()</code> 
+   * may not be equal to termText.length(), as the term text may have been altered by a
+   * stemmer or some other filter.
+   * @see #setOffset(int, int) 
+   */
   public int startOffset();
 
   
-  /** Set the starting and ending offset.
-    @see #startOffset() and #endOffset()*/
+  /** 
+   * Set the starting and ending offset.
+   * @throws IllegalArgumentException If <code>startOffset</code> or <code>endOffset</code>
+   *         are negative, or if <code>startOffset</code> is greater than 
+   *         <code>endOffset</code>
+   * @see #startOffset()
+   * @see #endOffset()
+   */
   public void setOffset(int startOffset, int endOffset);
   
 
-  /** Returns this Token's ending offset, one greater than the position of the
-  last character corresponding to this token in the source text. The length
-  of the token in the source text is (endOffset - startOffset). */
+  /** 
+   * Returns this Token's ending offset, one greater than the position of the
+   * last character corresponding to this token in the source text. The length
+   * of the token in the source text is (<code>endOffset()</code> - {@link #startOffset()}). 
+   * @see #setOffset(int, int)
+   */
   public int endOffset();
 }

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java Thu Aug 30 22:43:41 2012
@@ -19,26 +19,18 @@ package org.apache.lucene.analysis.token
 
 import org.apache.lucene.util.AttributeImpl;
 
-/**
- * The start and end character offset of a Token. 
- */
+/** Default implementation of {@link OffsetAttribute}. */
 public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute, Cloneable {
   private int startOffset;
   private int endOffset;
+  
+  /** Initialize this attribute with startOffset and endOffset of 0. */
+  public OffsetAttributeImpl() {}
 
-  /** Returns this Token's starting offset, the position of the first character
-  corresponding to this token in the source text.
-
-  Note that the difference between endOffset() and startOffset() may not be
-  equal to termText.length(), as the term text may have been altered by a
-  stemmer or some other filter. */
   public int startOffset() {
     return startOffset;
   }
 
-  
-  /** Set the starting and ending offset.
-    @see #startOffset() and #endOffset()*/
   public void setOffset(int startOffset, int endOffset) {
 
     // TODO: we could assert that this is set-once, ie,
@@ -56,10 +48,6 @@ public class OffsetAttributeImpl extends
     this.endOffset = endOffset;
   }
   
-
-  /** Returns this Token's ending offset, one greater than the position of the
-  last character corresponding to this token in the source text. The length
-  of the token in the source text is (endOffset - startOffset). */
   public int endOffset() {
     return endOffset;
   }

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java Thu Aug 30 22:43:41 2012
@@ -17,20 +17,34 @@ package org.apache.lucene.analysis.token
  * limitations under the License.
  */
 
+import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.BytesRef;
 
 /**
- * The payload of a Token. 
+ * The payload of a Token.
+ * <p>
+ * The payload is stored in the index at each position, and can
+ * be used to influence scoring when using Payload-based queries 
+ * in the {@link org.apache.lucene.search.payloads} and
+ * {@link org.apache.lucene.search.spans} packages.
+ * <p>
+ * NOTE: because the payload will be stored at each position, its usually
+ * best to use the minimum number of bytes necessary. Some codec implementations
+ * may optimize payload storage when all payloads have the same length.
+ * 
+ * @see DocsAndPositionsEnum
  */
 public interface PayloadAttribute extends Attribute {
   /**
    * Returns this Token's payload.
+   * @see #setPayload(BytesRef)
    */ 
   public BytesRef getPayload();
 
   /** 
    * Sets this Token's payload.
+   * @see #getPayload()
    */
   public void setPayload(BytesRef payload);
 }

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java Thu Aug 30 22:43:41 2012
@@ -20,9 +20,7 @@ package org.apache.lucene.analysis.token
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.BytesRef;
 
-/**
- * The payload of a Token.
- */
+/** Default implementation of {@link PayloadAttribute}. */
 public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute, Cloneable {
   private BytesRef payload;  
   
@@ -38,16 +36,10 @@ public class PayloadAttributeImpl extend
     this.payload = payload;
   }
   
-  /**
-   * Returns this Token's payload.
-   */ 
   public BytesRef getPayload() {
     return this.payload;
   }
 
-  /** 
-   * Sets this Token's payload.
-   */
   public void setPayload(BytesRef payload) {
     this.payload = payload;
   }

Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java Thu Aug 30 22:43:41 2012
@@ -49,11 +49,14 @@ public interface PositionIncrementAttrib
   /** Set the position increment. The default value is one.
    *
    * @param positionIncrement the distance from the prior term
+   * @throws IllegalArgumentException if <code>positionIncrement</code> 
+   *         is negative.
+   * @see #getPositionIncrement()
    */
   public void setPositionIncrement(int positionIncrement);
 
   /** Returns the position increment of this Token.
-   * @see #setPositionIncrement
+   * @see #setPositionIncrement(int)
    */
   public int getPositionIncrement();
 }