You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/08/31 00:43:59 UTC
svn commit: r1379200 [2/11] - in /lucene/dev/branches/lucene3312: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/
dev-tools/maven/ dev-tools/maven/lucene/core/
dev-tools/maven/lucene/test-framework/ dev-tools/scripts/ lucene/
lucene/an...
Modified: lucene/dev/branches/lucene3312/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/MIGRATE.txt?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/MIGRATE.txt (original)
+++ lucene/dev/branches/lucene3312/lucene/MIGRATE.txt Thu Aug 30 22:43:41 2012
@@ -318,9 +318,9 @@ FieldCache, use them with care!
The method IndexReader#getSequentialSubReaders() was moved to CompositeReader
(see LUCENE-2858, LUCENE-3733) and made protected. It is solely used by
-CompositeReader itsself to build its reader tree. To get all atomic leaves
+CompositeReader itself to build its reader tree. To get all atomic leaves
of a reader, use IndexReader#leaves(), which also provides the doc base
-of each leave. Readers that are already atomic return itsself as leaf with
+of each leave. Readers that are already atomic return itself as leaf with
doc base 0. To emulate Lucene 3.x getSequentialSubReaders(),
use getContext().children().
@@ -626,3 +626,8 @@ you can now do this:
method, StoredFieldVisitor has a needsField method: if that method
returns true then the field will be loaded and the appropriate
type-specific method will be invoked with that fields's value.
+
+* LUCENE-4122: Removed the Payload class and replaced with BytesRef.
+ PayloadAttribute's name is unchanged, it just uses the BytesRef
+ class to refer to the payload bytes/start offset/end offset
+ (or null if there is no payload).
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java Thu Aug 30 22:43:41 2012
@@ -94,8 +94,7 @@ public final class KeywordTokenizer exte
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
+ public void reset() throws IOException {
this.done = false;
}
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java Thu Aug 30 22:43:41 2012
@@ -78,9 +78,6 @@ public final class PatternTokenizer exte
if (group >= 0 && group > matcher.groupCount()) {
throw new IllegalArgumentException("invalid group specified: pattern only has: " + matcher.groupCount() + " capturing groups");
}
- fillBuffer(str, input);
- matcher.reset(str);
- index = 0;
}
@Override
@@ -136,8 +133,7 @@ public final class PatternTokenizer exte
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
+ public void reset() throws IOException {
fillBuffer(str, input);
matcher.reset(str);
index = 0;
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java Thu Aug 30 22:43:41 2012
@@ -175,8 +175,7 @@ public final class ClassicTokenizer exte
}
@Override
- public void setReader(Reader reader) throws IOException {
- super.setReader(reader);
- scanner.yyreset(reader);
+ public void reset() throws IOException {
+ scanner.yyreset(input);
}
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java Thu Aug 30 22:43:41 2012
@@ -183,8 +183,7 @@ public final class StandardTokenizer ext
}
@Override
- public void setReader(Reader reader) throws IOException {
- super.setReader(reader);
- scanner.yyreset(reader);
+ public void reset() throws IOException {
+ scanner.yyreset(input);
}
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Thu Aug 30 22:43:41 2012
@@ -162,8 +162,7 @@ public final class UAX29URLEmailTokenize
}
@Override
- public void setReader(Reader reader) throws IOException {
- super.setReader(reader);
- scanner.yyreset(reader);
+ public void reset() throws IOException {
+ scanner.yyreset(input);
}
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java Thu Aug 30 22:43:41 2012
@@ -37,6 +37,15 @@ import java.util.regex.PatternSyntaxExce
/**
* Abstract parent class for analysis factories {@link TokenizerFactory},
* {@link TokenFilterFactory} and {@link CharFilterFactory}.
+ * <p>
+ * The typical lifecycle for a factory consumer is:
+ * <ol>
+ * <li>Create factory via its a no-arg constructor
+ * <li>Set version emulation by calling {@link #setLuceneMatchVersion(Version)}
+ * <li>Calls {@link #init(Map)} passing arguments as key-value mappings.
+ * <li>(Optional) If the factory uses resources such as files, {@link ResourceLoaderAware#inform(ResourceLoader)} is called to initialize those resources.
+ * <li>Consumer calls create() to obtain instances.
+ * </ol>
*/
public abstract class AbstractAnalysisFactory {
@@ -46,6 +55,9 @@ public abstract class AbstractAnalysisFa
/** the luceneVersion arg */
protected Version luceneMatchVersion = null;
+ /**
+ * Initialize this factory via a set of key-value pairs.
+ */
public void init(Map<String,String> args) {
this.args = args;
}
@@ -104,6 +116,9 @@ public abstract class AbstractAnalysisFa
return Boolean.parseBoolean(s);
}
+ /**
+ * Compiles a pattern for the value of the specified argument key <code>name</code>
+ */
protected Pattern getPattern(String name) {
try {
String pat = args.get(name);
@@ -118,6 +133,10 @@ public abstract class AbstractAnalysisFa
}
}
+ /**
+ * Returns as {@link CharArraySet} from wordFiles, which
+ * can be a comma-separated list of filenames
+ */
protected CharArraySet getWordSet(ResourceLoader loader,
String wordFiles, boolean ignoreCase) throws IOException {
assureMatchVersion();
@@ -137,6 +156,9 @@ public abstract class AbstractAnalysisFa
return words;
}
+ /**
+ * Returns the resource's lines (with content treated as UTF-8)
+ */
protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java Thu Aug 30 22:43:41 2012
@@ -78,7 +78,8 @@ public abstract class CharTokenizer exte
charUtils = CharacterUtils.getInstance(matchVersion);
}
- private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
+ // note: bufferIndex is -1 here to best-effort AIOOBE consumers that don't call reset()
+ private int offset = 0, bufferIndex = -1, dataLen = 0, finalOffset = 0;
private static final int MAX_WORD_LEN = 255;
private static final int IO_BUFFER_SIZE = 4096;
@@ -162,8 +163,7 @@ public abstract class CharTokenizer exte
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
+ public void reset() throws IOException {
bufferIndex = 0;
offset = 0;
dataLen = 0;
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java Thu Aug 30 22:43:41 2012
@@ -27,5 +27,9 @@ import java.io.IOException;
*/
public interface ResourceLoaderAware {
+ /**
+ * Initializes this component with the provided ResourceLoader
+ * (used for loading classes, files, etc).
+ */
void inform(ResourceLoader loader) throws IOException;
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java Thu Aug 30 22:43:41 2012
@@ -19,6 +19,9 @@ package org.apache.lucene.analysis.util;
/** Some commonly-used stemming functions */
public class StemmerUtil {
+ /** no instance */
+ private StemmerUtil() {}
+
/**
* Returns true if the character array starts with the suffix.
*
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java Thu Aug 30 22:43:41 2012
@@ -36,7 +36,10 @@ import org.apache.lucene.util.Version;
*/
public class WordlistLoader {
- private static final int INITITAL_CAPACITY = 16;
+ private static final int INITIAL_CAPACITY = 16;
+
+ /** no instance */
+ private WordlistLoader() {}
/**
* Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
@@ -74,7 +77,7 @@ public class WordlistLoader {
* @return A {@link CharArraySet} with the reader's words
*/
public static CharArraySet getWordSet(Reader reader, Version matchVersion) throws IOException {
- return getWordSet(reader, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
+ return getWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
}
/**
@@ -89,7 +92,7 @@ public class WordlistLoader {
* @return A CharArraySet with the reader's words
*/
public static CharArraySet getWordSet(Reader reader, String comment, Version matchVersion) throws IOException {
- return getWordSet(reader, comment, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
+ return getWordSet(reader, comment, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
}
/**
@@ -171,7 +174,7 @@ public class WordlistLoader {
* @return A {@link CharArraySet} with the reader's words
*/
public static CharArraySet getSnowballWordSet(Reader reader, Version matchVersion) throws IOException {
- return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITITAL_CAPACITY, false));
+ return getSnowballWordSet(reader, new CharArraySet(matchVersion, INITIAL_CAPACITY, false));
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java Thu Aug 30 22:43:41 2012
@@ -318,19 +318,13 @@ public final class WikipediaTokenizer ex
*/
@Override
public void reset() throws IOException {
- super.reset();
+ scanner.yyreset(input);
tokens = null;
scanner.reset();
first = true;
}
@Override
- public void setReader(Reader reader) throws IOException {
- super.setReader(reader);
- scanner.yyreset(input);
- }
-
- @Override
public void end() {
// set final offset
final int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java Thu Aug 30 22:43:41 2012
@@ -39,6 +39,7 @@ public class CommonGramsFilterTest exten
CommonGramsFilter cgf = new CommonGramsFilter(TEST_VERSION_CURRENT, wt, commonWords);
CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class);
+ cgf.reset();
assertTrue(cgf.incrementToken());
assertEquals("How", term.toString());
assertTrue(cgf.incrementToken());
@@ -61,6 +62,7 @@ public class CommonGramsFilterTest exten
CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf);
CharTermAttribute term = wt.addAttribute(CharTermAttribute.class);
+ nsf.reset();
assertTrue(nsf.incrementToken());
assertEquals("How_the", term.toString());
assertTrue(nsf.incrementToken());
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Thu Aug 30 22:43:41 2012
@@ -235,6 +235,7 @@ public class TestCompoundWordTokenFilter
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
+ tf.reset();
assertTrue(tf.incrementToken());
assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
assertTrue(tf.incrementToken());
@@ -256,6 +257,7 @@ public class TestCompoundWordTokenFilter
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
MockRetainAttribute retAtt = stream.addAttribute(MockRetainAttribute.class);
+ stream.reset();
while (stream.incrementToken()) {
assertTrue("Custom attribute value was lost", retAtt.getRetain());
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java Thu Aug 30 22:43:41 2012
@@ -80,6 +80,7 @@ public class TestAnalyzers extends BaseT
void verifyPayload(TokenStream ts) throws IOException {
PayloadAttribute payloadAtt = ts.getAttribute(PayloadAttribute.class);
+ ts.reset();
for(byte b=1;;b++) {
boolean hasNext = ts.incrementToken();
if (!hasNext) break;
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java Thu Aug 30 22:43:41 2012
@@ -66,6 +66,7 @@ public class TestStopAnalyzer extends Ba
assertNotNull(stream);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
+ stream.reset();
while (stream.incrementToken()) {
String text = termAtt.toString();
assertFalse(stopWordsSet.contains(text));
@@ -83,6 +84,7 @@ public class TestStopAnalyzer extends Ba
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
+ stream.reset();
while (stream.incrementToken()) {
String text = termAtt.toString();
assertFalse(stopWordsSet.contains(text));
Modified: lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java Thu Aug 30 22:43:41 2012
@@ -111,6 +111,7 @@ public class TestPatternTokenizer extend
// assign bogus values
in.clearAttributes();
termAtt.setEmpty().append("bogusTerm");
+ in.reset();
while (in.incrementToken()) {
if (out.length() > 0)
out.append(' ');
Modified: lucene/dev/branches/lucene3312/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizer.java Thu Aug 30 22:43:41 2012
@@ -45,7 +45,8 @@ public final class ICUTokenizer extends
/** true length of text in the buffer */
private int length = 0;
/** length in buffer that can be evaluated safely, up to a safe end point */
- private int usableLength = 0;
+ // note: usableLength is -1 here to best-effort AIOOBE consumers that don't call reset()
+ private int usableLength = -1;
/** accumulated offset of previous buffers for this reader, for offsetAtt */
private int offset = 0;
@@ -101,12 +102,6 @@ public final class ICUTokenizer extends
breaker.setText(buffer, 0, 0);
length = usableLength = offset = 0;
}
-
- @Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
- reset();
- }
@Override
public void end() {
Modified: lucene/dev/branches/lucene3312/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java Thu Aug 30 22:43:41 2012
@@ -245,14 +245,8 @@ public final class JapaneseTokenizer ext
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
- buffer.reset(input);
- }
-
- @Override
public void reset() throws IOException {
- super.reset();
+ buffer.reset(input);
resetState();
}
Modified: lucene/dev/branches/lucene3312/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java Thu Aug 30 22:43:41 2012
@@ -112,17 +112,10 @@ public final class SentenceTokenizer ext
@Override
public void reset() throws IOException {
- super.reset();
tokenStart = tokenEnd = 0;
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
- reset();
- }
-
- @Override
public void end() {
// set final offset
final int finalOffset = correctOffset(tokenEnd);
Modified: lucene/dev/branches/lucene3312/lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/stempel/src/java/org/egothor/stemmer/Row.java Thu Aug 30 22:43:41 2012
@@ -220,7 +220,7 @@ public class Row {
* Character.
*
* @param way the Character associated with the desired Cell
- * @return the reference, or -1 if the Cell is <tt>null,/tt>
+ * @return the reference, or -1 if the Cell is <tt>null</tt>
*/
public int getRef(Character way) {
Cell c = at(way);
Modified: lucene/dev/branches/lucene3312/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/analysis/uima/src/java/org/apache/lucene/analysis/uima/BaseUIMATokenizer.java Thu Aug 30 22:43:41 2012
@@ -80,8 +80,7 @@ public abstract class BaseUIMATokenizer
}
@Override
- public void setReader(Reader input) throws IOException {
- super.setReader(input);
+ public void reset() throws IOException {
iterator = null;
}
Modified: lucene/dev/branches/lucene3312/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/build.xml?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/build.xml (original)
+++ lucene/dev/branches/lucene3312/lucene/build.xml Thu Aug 30 22:43:41 2012
@@ -248,6 +248,10 @@
<!-- spatial: problems -->
<check-missing-javadocs dir="build/docs/suggest" level="class"/>
<check-missing-javadocs dir="build/docs/test-framework" level="class"/>
+
+ <!-- too much to fix for now, but enforce full javadocs for key packages -->
+ <check-missing-javadocs dir="build/docs/core/org/apache/lucene/analysis" level="method"/>
+ <check-missing-javadocs dir="build/docs/core/org/apache/lucene/document" level="method"/>
</sequential>
</target>
@@ -452,16 +456,6 @@
<sign-artifacts-macro artifacts.dir="${dist.dir}"/>
</target>
- <!-- ================================================================== -->
- <!-- Build the JavaCC files into the source tree -->
- <!-- ================================================================== -->
-
- <target name="javacc" depends="javacc-check">
- <subant target="javacc" failonerror="true" inheritall="false">
- <fileset dir="${common.dir}/queryparser" includes="build.xml"/>
- </subant>
- </target>
-
<target name="build-modules" depends="compile-test"
description="Builds all additional modules and their tests">
<modules-crawl target="build-artifacts-and-tests"/>
@@ -480,24 +474,6 @@
<modules-crawl target="test" failonerror="true"/>
</target>
- <!--
- compile changes.txt into an html file
- -->
- <macrodef name="build-changes">
- <attribute name="changes.src.dir" default="${changes.src.dir}"/>
- <attribute name="changes.target.dir" default="${changes.target.dir}"/>
- <sequential>
- <mkdir dir="@{changes.target.dir}"/>
- <exec executable="perl" input="CHANGES.txt" output="@{changes.target.dir}/Changes.html"
- failonerror="true" logError="true">
- <arg value="@{changes.src.dir}/changes2html.pl"/>
- </exec>
- <copy todir="@{changes.target.dir}">
- <fileset dir="@{changes.src.dir}" includes="*.css"/>
- </copy>
- </sequential>
- </macrodef>
-
<target name="changes-to-html">
<build-changes changes.src.dir="${changes.src.dir}" changes.target.dir="${changes.target.dir}" />
</target>
Modified: lucene/dev/branches/lucene3312/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/common-build.xml?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/common-build.xml (original)
+++ lucene/dev/branches/lucene3312/lucene/common-build.xml Thu Aug 30 22:43:41 2012
@@ -173,7 +173,6 @@
<property name="m2.repository.url" value="file://${maven.dist.dir}"/>
<property name="m2.repository.private.key" value="${user.home}/.ssh/id_dsa"/>
- <property name="javacc.home" location="${common.dir}"/>
<property name="jflex.home" location="${common.dir}"/>
<path id="jflex.classpath">
@@ -185,12 +184,6 @@
</fileset>
</path>
- <path id="javacc.classpath">
- <fileset dir="${javacc.home}/">
- <include name="bin/lib/*.jar"/>
- </fileset>
- </path>
-
<property name="backwards.dir" location="backwards"/>
<property name="build.dir.backwards" location="${build.dir}/backwards"/>
@@ -261,23 +254,6 @@
<delete file="velocity.log"/>
</target>
- <!-- TODO: maybe make JavaCC checking specific to only the projects
- that use it (Lucene core and queryparsers)
- -->
- <target name="javacc-uptodate-check">
- <uptodate property="javacc.files.uptodate">
- <srcfiles dir="${src.dir}" includes="**/*.jj" />
- <mapper type="glob" from="*.jj" to="*.java"/>
- </uptodate>
- </target>
-
- <target name="javacc-notice" depends="javacc-uptodate-check" unless="javacc.files.uptodate">
- <echo>
- One or more of the JavaCC .jj files is newer than its corresponding
- .java file. Run the "javacc" target to regenerate the artifacts.
- </echo>
- </target>
-
<target name="init" depends="resolve">
<!-- currently empty -->
</target>
@@ -391,36 +367,6 @@
</echo>
</target>
- <target name="javacc-check">
- <available property="javacc.present" classname="org.javacc.parser.Main">
- <classpath refid="javacc.classpath"/>
- </available>
- <fail unless="javacc.present">
- ##################################################################
- JavaCC not found.
- JavaCC Home: ${javacc.home}
-
- Please download and install JavaCC 4.1 from:
-
- <http://javacc.dev.java.net>
-
- Then, create a build.properties file either in your home
- directory, or within the Lucene directory and set the javacc.home
- property to the path where JavaCC is installed. For example,
- if you installed JavaCC in /usr/local/java/javacc-4.1, then set the
- javacc.home property to:
-
- javacc.home=/usr/local/java/javacc-4.1
-
- If you get an error like the one below, then you have not installed
- things correctly. Please check all your paths and try again.
-
- java.lang.NoClassDefFoundError: org.javacc.parser.Main
- ##################################################################
- </fail>
-
- </target>
-
<target name="jflex-check">
<available property="jflex.present" classname="jflex.anttask.JFlexTask">
<classpath refid="jflex.classpath"/>
@@ -508,6 +454,9 @@
<attribute name="spec.version"/>
<attribute name="manifest.file" default="${manifest.file}"/>
<sequential>
+ <!-- If possible, include the svnversion -->
+ <exec dir="." executable="${svnversion.exe}" outputproperty="svnversion" failifexecutionfails="false"/>
+
<manifest file="@{manifest.file}">
<!--
http://java.sun.com/j2se/1.5.0/docs/guide/jar/jar.html#JAR%20Manifest
@@ -558,12 +507,6 @@
<attribute name="manifest.file" default="${manifest.file}"/>
<element name="nested" optional="true" implicit="true"/>
<sequential>
- <!-- If possible, include the svnversion -->
- <exec dir="." executable="${svnversion.exe}"
- outputproperty="svnversion" failifexecutionfails="false">
- <arg value="."/>
- </exec>
-
<build-manifest title="@{title}"
implementation.title="@{implementation.title}"
spec.version="@{spec.version}"
@@ -850,7 +793,14 @@
<sysproperty key="tests.multiplier" value="@{tests.multiplier}"/>
<!-- Temporary directory in the cwd. -->
- <sysproperty key="tempDir" value="."/>
+ <sysproperty key="tempDir" value="." />
+ <sysproperty key="java.io.tmpdir" value="." />
+
+ <!-- Restrict access to certain Java features and install security manager: -->
+ <sysproperty key="tests.sandbox.dir" value="${build.dir}" />
+ <sysproperty key="clover.db.dir" value="${clover.db.dir}" />
+ <sysproperty key="java.security.manager" value="java.lang.SecurityManager" />
+ <sysproperty key="java.security.policy" value="${common.dir}/tools/junit4/tests.policy" />
<sysproperty key="lucene.version" value="${dev.version}"/>
@@ -1381,31 +1331,11 @@ ${tests-output}/junit4-*.suites - pe
<!-- <compilerarg line="-Xmaxwarns 10000000"/>
<compilerarg line="-Xmaxerrs 10000000"/> -->
<!-- for generics in Java 1.5: -->
- <compilerarg line="${javac.args}"/>
+ <compilerarg line="${javac.args}"/>
</javac>
</sequential>
</macrodef>
- <macrodef name="invoke-javacc">
- <attribute name="target"/>
- <attribute name="outputDir"/>
- <sequential>
- <mkdir dir="@{outputDir}"/>
- <javacc
- target="@{target}"
- outputDirectory="@{outputDir}"
- debugTokenManager="${javacc.debug.tokenmgr}"
- debugParser="${javacc.debug.parser}"
- debuglookahead="${javacc.debug.lookahead}"
- javacchome="${javacc.home}"
- jdkversion="${javac.source}"
- />
- <fixcrlf srcdir="@{outputDir}" includes="*.java" encoding="UTF-8">
- <containsregexp expression="Generated.*By.*JavaCC"/>
- </fixcrlf>
- </sequential>
- </macrodef>
-
<property name="failonjavadocwarning" value="true"/>
<macrodef name="invoke-javadoc">
<element name="sources" optional="yes"/>
@@ -1547,10 +1477,10 @@ ${tests-output}/junit4-*.suites - pe
description="Populates properties svn.URL and svn.Revision using 'svn info'.">
<attribute name="directory"/>
<sequential>
- <exec dir="." executable="${svnversion.exe}" outputproperty="svn.ver"/>
+ <exec dir="@{directory}" executable="${svnversion.exe}" outputproperty="svn.ver"/>
<fail message="A subversion checkout is required for this target">
<condition>
- <equals arg1="${svn.ver}" arg2="exported"/>
+ <matches pattern="(exported|unversioned.*)" string="${svn.ver}" casesensitive="false"/>
</condition>
</fail>
<exec dir="@{directory}" executable="${svn.exe}" outputproperty="svn.info" failonerror="true">
@@ -1697,7 +1627,7 @@ ${tests-output}/junit4-*.suites - pe
<element name="nested" optional="false" implicit="true"/>
<sequential>
<copy todir="@{todir}" flatten="@{flatten}" overwrite="@{overwrite}" verbose="true"
- preservelastmodified="false" encoding="UTF-8" outputencoding="UTF-8"
+ preservelastmodified="false" encoding="UTF-8" outputencoding="UTF-8" taskname="pegdown"
>
<filterchain>
<tokenfilter>
@@ -1757,4 +1687,22 @@ ${tests-output}/junit4-*.suites - pe
</sequential>
</macrodef>
+ <!--
+ compile changes.txt into an html file
+ -->
+ <macrodef name="build-changes">
+ <attribute name="changes.src.dir" default="${changes.src.dir}"/>
+ <attribute name="changes.target.dir" default="${changes.target.dir}"/>
+ <sequential>
+ <mkdir dir="@{changes.target.dir}"/>
+ <exec executable="perl" input="CHANGES.txt" output="@{changes.target.dir}/Changes.html"
+ failonerror="true" logError="true">
+ <arg value="@{changes.src.dir}/changes2html.pl"/>
+ </exec>
+ <copy todir="@{changes.target.dir}">
+ <fileset dir="@{changes.src.dir}" includes="*.css"/>
+ </copy>
+ </sequential>
+ </macrodef>
+
</project>
Modified: lucene/dev/branches/lucene3312/lucene/core/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/build.xml?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/build.xml (original)
+++ lucene/dev/branches/lucene3312/lucene/core/build.xml Thu Aug 30 22:43:41 2012
@@ -38,8 +38,6 @@
<pathelement path="${java.class.path}"/>
</path>
- <target name="compile-core" depends="jflex-notice, javacc-notice, common.compile-core"/>
-
<target name="test-core" depends="common.test"/>
<target name="javadocs-core" depends="javadocs"/>
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java Thu Aug 30 22:43:41 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.CloseableThreadLocal;
+import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
@@ -67,14 +68,26 @@ import java.util.Map;
* Analysis integration with Apache UIMA.
* </ul>
*/
-public abstract class Analyzer {
+public abstract class Analyzer implements Closeable {
private final ReuseStrategy reuseStrategy;
+ /**
+ * Create a new Analyzer, reusing the same set of components per-thread
+ * across calls to {@link #tokenStream(String, Reader)}.
+ */
public Analyzer() {
this(new GlobalReuseStrategy());
}
+ /**
+ * Expert: create a new Analyzer with a custom {@link ReuseStrategy}.
+ * <p>
+ * NOTE: if you just want to reuse on a per-field basis, its easier to
+ * use a subclass of {@link AnalyzerWrapper} such as
+ * <a href="{@docRoot}/../analyzers-common/org/apache/lucene/analysis/miscellaneous/PerFieldAnalyzerWrapper.html">
+ * PerFieldAnalyerWrapper</a> instead.
+ */
public Analyzer(ReuseStrategy reuseStrategy) {
this.reuseStrategy = reuseStrategy;
}
@@ -93,20 +106,25 @@ public abstract class Analyzer {
Reader reader);
/**
- * Creates a TokenStream that is allowed to be re-use from the previous time
- * that the same thread called this method. Callers that do not need to use
- * more than one TokenStream at the same time from this analyzer should use
- * this method for better performance.
+ * Returns a TokenStream suitable for <code>fieldName</code>, tokenizing
+ * the contents of <code>reader</code>.
* <p>
* This method uses {@link #createComponents(String, Reader)} to obtain an
* instance of {@link TokenStreamComponents}. It returns the sink of the
* components and stores the components internally. Subsequent calls to this
* method will reuse the previously stored components after resetting them
* through {@link TokenStreamComponents#setReader(Reader)}.
- * </p>
+ * <p>
+ * <b>NOTE:</b> After calling this method, the consumer must follow the
+ * workflow described in {@link TokenStream} to properly consume its contents.
+ * See the {@link org.apache.lucene.analysis Analysis package documentation} for
+ * some examples demonstrating this.
*
* @param fieldName the name of the field the created TokenStream is used for
* @param reader the reader the streams source reads from
+ * @return TokenStream for iterating the analyzed content of <code>reader</code>
+ * @throws AlreadyClosedException if the Analyzer is closed.
+ * @throws IOException if an i/o error occurs.
*/
public final TokenStream tokenStream(final String fieldName,
final Reader reader) throws IOException {
@@ -123,6 +141,13 @@ public abstract class Analyzer {
/**
* Override this if you want to add a CharFilter chain.
+ * <p>
+ * The default implementation returns <code>reader</code>
+ * unchanged.
+ *
+ * @param fieldName IndexableField name being indexed
+ * @param reader original Reader
+ * @return reader, optionally decorated with CharFilter(s)
*/
protected Reader initReader(String fieldName, Reader reader) {
return reader;
@@ -139,7 +164,8 @@ public abstract class Analyzer {
* exact PhraseQuery matches, for instance, across IndexableField instance boundaries.
*
* @param fieldName IndexableField name being indexed.
- * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
+ * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}.
+ * This value must be {@code >= 0}.
*/
public int getPositionIncrementGap(String fieldName) {
return 0;
@@ -152,7 +178,8 @@ public abstract class Analyzer {
* produced at least one token for indexing.
*
* @param fieldName the field just indexed
- * @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
+ * @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}.
+ * This value must be {@code >= 0}.
*/
public int getOffsetGap(String fieldName) {
return 1;
@@ -171,7 +198,14 @@ public abstract class Analyzer {
* {@link Analyzer#tokenStream(String, Reader)}.
*/
public static class TokenStreamComponents {
+ /**
+ * Original source of the tokens.
+ */
protected final Tokenizer source;
+ /**
+ * Sink tokenstream, such as the outer tokenfilter decorating
+ * the chain. This can be the source if there are no filters.
+ */
protected final TokenStream sink;
/**
@@ -235,10 +269,13 @@ public abstract class Analyzer {
* Strategy defining how TokenStreamComponents are reused per call to
* {@link Analyzer#tokenStream(String, java.io.Reader)}.
*/
- public static abstract class ReuseStrategy {
+ public static abstract class ReuseStrategy implements Closeable {
private CloseableThreadLocal<Object> storedValue = new CloseableThreadLocal<Object>();
+ /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
+ public ReuseStrategy() {}
+
/**
* Gets the reusable TokenStreamComponents for the field with the given name
*
@@ -262,6 +299,7 @@ public abstract class Analyzer {
* Returns the currently stored value
*
* @return Currently stored value or {@code null} if no value is stored
+ * @throws AlreadyClosedException if the ReuseStrategy is closed.
*/
protected final Object getStoredValue() {
try {
@@ -279,6 +317,7 @@ public abstract class Analyzer {
* Sets the stored value
*
* @param storedValue Value to store
+ * @throws AlreadyClosedException if the ReuseStrategy is closed.
*/
protected final void setStoredValue(Object storedValue) {
try {
@@ -296,8 +335,10 @@ public abstract class Analyzer {
* Closes the ReuseStrategy, freeing any resources
*/
public void close() {
- storedValue.close();
- storedValue = null;
+ if (storedValue != null) {
+ storedValue.close();
+ storedValue = null;
+ }
}
}
@@ -306,17 +347,16 @@ public abstract class Analyzer {
* every field.
*/
public final static class GlobalReuseStrategy extends ReuseStrategy {
+
+ /** Creates a new instance, with empty per-thread values */
+ public GlobalReuseStrategy() {}
- /**
- * {@inheritDoc}
- */
+ @Override
public TokenStreamComponents getReusableComponents(String fieldName) {
return (TokenStreamComponents) getStoredValue();
}
- /**
- * {@inheritDoc}
- */
+ @Override
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
setStoredValue(components);
}
@@ -328,19 +368,18 @@ public abstract class Analyzer {
*/
public static class PerFieldReuseStrategy extends ReuseStrategy {
- /**
- * {@inheritDoc}
- */
+ /** Creates a new instance, with empty per-thread-per-field values */
+ public PerFieldReuseStrategy() {}
+
@SuppressWarnings("unchecked")
+ @Override
public TokenStreamComponents getReusableComponents(String fieldName) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
return componentsPerField != null ? componentsPerField.get(fieldName) : null;
}
- /**
- * {@inheritDoc}
- */
@SuppressWarnings("unchecked")
+ @Override
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
Map<String, TokenStreamComponents> componentsPerField = (Map<String, TokenStreamComponents>) getStoredValue();
if (componentsPerField == null) {
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/AnalyzerWrapper.java Thu Aug 30 22:43:41 2012
@@ -61,25 +61,16 @@ public abstract class AnalyzerWrapper ex
*/
protected abstract TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components);
- /**
- * {@inheritDoc}
- */
@Override
protected final TokenStreamComponents createComponents(String fieldName, Reader aReader) {
return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName, aReader));
}
- /**
- * {@inheritDoc}
- */
@Override
public final int getPositionIncrementGap(String fieldName) {
return getWrappedAnalyzer(fieldName).getPositionIncrementGap(fieldName);
}
- /**
- * {@inheritDoc}
- */
@Override
public final int getOffsetGap(String fieldName) {
return getWrappedAnalyzer(fieldName).getOffsetGap(fieldName);
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CachingTokenFilter.java Thu Aug 30 22:43:41 2012
@@ -38,6 +38,11 @@ public final class CachingTokenFilter ex
private Iterator<AttributeSource.State> iterator = null;
private AttributeSource.State finalState;
+ /**
+ * Create a new CachingTokenFilter around <code>input</code>,
+ * caching its token attributes, which can be replayed again
+ * after a call to {@link #reset()}.
+ */
public CachingTokenFilter(TokenStream input) {
super(input);
}
@@ -67,6 +72,13 @@ public final class CachingTokenFilter ex
}
}
+ /**
+ * Rewinds the iterator to the beginning of the cached list.
+ * <p>
+ * Note that this does not call reset() on the wrapped tokenstream ever, even
+ * the first time. You should reset() the inner tokenstream before wrapping
+ * it with CachingTokenFilter.
+ */
@Override
public void reset() {
if(cache != null) {
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/CharFilter.java Thu Aug 30 22:43:41 2012
@@ -33,6 +33,9 @@ import java.io.Reader;
* You can optionally provide more efficient implementations of additional methods
* like {@link #read()}, {@link #read(char[])}, {@link #read(java.nio.CharBuffer)},
* but this is not required.
+ * <p>
+ * For examples and integration with {@link Analyzer}, see the
+ * {@link org.apache.lucene.analysis Analysis package documentation}.
*/
// the way java.io.FilterReader should work!
public abstract class CharFilter extends Reader {
@@ -52,6 +55,10 @@ public abstract class CharFilter extends
/**
* Closes the underlying input stream.
+ * <p>
+ * <b>NOTE:</b>
+ * The default implementation closes the input Reader, so
+ * be sure to call <code>super.close()</code> when overriding this method.
*/
@Override
public void close() throws IOException {
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/NumericTokenStream.java Thu Aug 30 22:43:41 2012
@@ -144,6 +144,12 @@ public final class NumericTokenStream ex
private long value = 0L;
private int valueSize = 0, shift = 0, precisionStep = 0;
private BytesRef bytes = new BytesRef();
+
+ /**
+ * Creates, but does not yet initialize this attribute instance
+ * @see #init(long, int, int, int)
+ */
+ public NumericTermAttributeImpl() {}
public BytesRef getBytesRef() {
return bytes;
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Token.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Token.java Thu Aug 30 22:43:41 2012
@@ -176,8 +176,8 @@ public class Token extends CharTermAttri
* instead use the char[] termBuffer methods to set the
* term text.
* @param text term text
- * @param start start offset
- * @param end end offset
+ * @param start start offset in the source text
+ * @param end end offset in the source text
*/
public Token(String text, int start, int end) {
checkOffsets(start, end);
@@ -191,8 +191,8 @@ public class Token extends CharTermAttri
* speed you should instead use the char[] termBuffer
* methods to set the term text.
* @param text term text
- * @param start start offset
- * @param end end offset
+ * @param start start offset in the source text
+ * @param end end offset in the source text
* @param typ token type
*/
public Token(String text, int start, int end, String typ) {
@@ -208,9 +208,9 @@ public class Token extends CharTermAttri
* offsets, & type. <b>NOTE:</b> for better indexing
* speed you should instead use the char[] termBuffer
* methods to set the term text.
- * @param text
- * @param start
- * @param end
+ * @param text term text
+ * @param start start offset in the source text
+ * @param end end offset in the source text
* @param flags token type bits
*/
public Token(String text, int start, int end, int flags) {
@@ -225,11 +225,11 @@ public class Token extends CharTermAttri
* Constructs a Token with the given term buffer (offset
* & length), start and end
* offsets
- * @param startTermBuffer
- * @param termBufferOffset
- * @param termBufferLength
- * @param start
- * @param end
+ * @param startTermBuffer buffer containing term text
+ * @param termBufferOffset the index in the buffer of the first character
+ * @param termBufferLength number of valid characters in the buffer
+ * @param start start offset in the source text
+ * @param end end offset in the source text
*/
public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) {
checkOffsets(start, end);
@@ -238,31 +238,9 @@ public class Token extends CharTermAttri
endOffset = end;
}
- /** Set the position increment. This determines the position of this token
- * relative to the previous Token in a {@link TokenStream}, used in phrase
- * searching.
- *
- * <p>The default value is one.
- *
- * <p>Some common uses for this are:<ul>
- *
- * <li>Set it to zero to put multiple terms in the same position. This is
- * useful if, e.g., a word has multiple stems. Searches for phrases
- * including either stem will match. In this case, all but the first stem's
- * increment should be set to zero: the increment of the first instance
- * should be one. Repeating a token with an increment of zero can also be
- * used to boost the scores of matches on that token.
- *
- * <li>Set it to values greater than one to inhibit exact phrase matches.
- * If, for example, one does not want phrases to match across removed stop
- * words, then one could build a stop word filter that removes stop words and
- * also sets the increment to the number of stop words removed before each
- * non-stop word. Then exact phrase queries will only match when the terms
- * occur with no intervening stop words.
- *
- * </ul>
- * @param positionIncrement the distance from the prior term
- * @see org.apache.lucene.index.DocsAndPositionsEnum
+ /**
+ * {@inheritDoc}
+ * @see PositionIncrementAttribute
*/
public void setPositionIncrement(int positionIncrement) {
if (positionIncrement < 0)
@@ -271,93 +249,101 @@ public class Token extends CharTermAttri
this.positionIncrement = positionIncrement;
}
- /** Returns the position increment of this Token.
- * @see #setPositionIncrement
+ /**
+ * {@inheritDoc}
+ * @see PositionIncrementAttribute
*/
public int getPositionIncrement() {
return positionIncrement;
}
- /** Set the position length.
- * @see PositionLengthAttribute */
+ /**
+ * {@inheritDoc}
+ * @see PositionLengthAttribute
+ */
@Override
public void setPositionLength(int positionLength) {
this.positionLength = positionLength;
}
- /** Get the position length.
- * @see PositionLengthAttribute */
+ /**
+ * {@inheritDoc}
+ * @see PositionLengthAttribute
+ */
@Override
public int getPositionLength() {
return positionLength;
}
- /** Returns this Token's starting offset, the position of the first character
- corresponding to this token in the source text.
-
- Note that the difference between endOffset() and startOffset() may not be
- equal to {@link #length}, as the term text may have been altered by a
- stemmer or some other filter. */
+ /**
+ * {@inheritDoc}
+ * @see OffsetAttribute
+ */
public final int startOffset() {
return startOffset;
}
- /** Returns this Token's ending offset, one greater than the position of the
- last character corresponding to this token in the source text. The length
- of the token in the source text is (endOffset - startOffset). */
+ /**
+ * {@inheritDoc}
+ * @see OffsetAttribute
+ */
public final int endOffset() {
return endOffset;
}
- /** Set the starting and ending offset.
- @see #startOffset() and #endOffset()*/
+ /**
+ * {@inheritDoc}
+ * @see OffsetAttribute
+ */
public void setOffset(int startOffset, int endOffset) {
checkOffsets(startOffset, endOffset);
this.startOffset = startOffset;
this.endOffset = endOffset;
}
- /** Returns this Token's lexical type. Defaults to "word". */
+ /**
+ * {@inheritDoc}
+ * @see TypeAttribute
+ */
public final String type() {
return type;
}
- /** Set the lexical type.
- @see #type() */
+ /**
+ * {@inheritDoc}
+ * @see TypeAttribute
+ */
public final void setType(String type) {
this.type = type;
}
/**
- * <p/>
- *
- * Get the bitset for any bits that have been set. This is completely distinct from {@link #type()}, although they do share similar purposes.
- * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
- *
- *
- * @return The bits
- * @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
+ * {@inheritDoc}
+ * @see FlagsAttribute
*/
public int getFlags() {
return flags;
}
/**
- * @see #getFlags()
+ * {@inheritDoc}
+ * @see FlagsAttribute
*/
public void setFlags(int flags) {
this.flags = flags;
}
/**
- * Returns this Token's payload.
- */
+ * {@inheritDoc}
+ * @see PayloadAttribute
+ */
public BytesRef getPayload() {
return this.payload;
}
- /**
- * Sets this Token's payload.
+ /**
+ * {@inheritDoc}
+ * @see PayloadAttribute
*/
public void setPayload(BytesRef payload) {
this.payload = payload;
@@ -551,8 +537,8 @@ public class Token extends CharTermAttri
/**
* Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
- * @param prototype
- * @param newTerm
+ * @param prototype existing Token
+ * @param newTerm new term text
*/
public void reinit(Token prototype, String newTerm) {
setEmpty().append(newTerm);
@@ -566,10 +552,10 @@ public class Token extends CharTermAttri
/**
* Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
- * @param prototype
- * @param newTermBuffer
- * @param offset
- * @param length
+ * @param prototype existing Token
+ * @param newTermBuffer buffer containing new term text
+ * @param offset the index in the buffer of the first character
+ * @param length number of valid characters in the buffer
*/
public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) {
copyBuffer(newTermBuffer, offset, length);
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenFilter.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenFilter.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenFilter.java Thu Aug 30 22:43:41 2012
@@ -34,21 +34,37 @@ public abstract class TokenFilter extend
this.input = input;
}
- /** Performs end-of-stream operations, if any, and calls then <code>end()</code> on the
- * input TokenStream.<p/>
- * <b>NOTE:</b> Be sure to call <code>super.end()</code> first when overriding this method.*/
+ /**
+ * {@inheritDoc}
+ * <p>
+ * <b>NOTE:</b>
+ * The default implementation chains the call to the input TokenStream, so
+ * be sure to call <code>super.end()</code> first when overriding this method.
+ */
@Override
public void end() throws IOException {
input.end();
}
- /** Close the input TokenStream. */
+ /**
+ * {@inheritDoc}
+ * <p>
+ * <b>NOTE:</b>
+ * The default implementation chains the call to the input TokenStream, so
+ * be sure to call <code>super.close()</code> when overriding this method.
+ */
@Override
public void close() throws IOException {
input.close();
}
- /** Reset the filter as well as the input TokenStream. */
+ /**
+ * {@inheritDoc}
+ * <p>
+ * <b>NOTE:</b>
+ * The default implementation chains the call to the input TokenStream, so
+ * be sure to call <code>super.reset()</code> when overriding this method.
+ */
@Override
public void reset() throws IOException {
input.reset();
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java Thu Aug 30 22:43:41 2012
@@ -170,12 +170,8 @@ public abstract class TokenStream extend
* This method is called by a consumer before it begins consumption using
* {@link #incrementToken()}.
* <p/>
- * Resets this stream to the beginning. As all TokenStreams must be reusable,
- * any implementations which have state that needs to be reset between usages
- * of the TokenStream, must implement this method. Note that if your TokenStream
- * caches tokens and feeds them back again after a reset, it is imperative
- * that you clone the tokens when you store them away (on the first pass) as
- * well as when you return them (on future passes after {@link #reset()}).
+ * Resets this stream to a clean state. Stateful implementations must implement
+ * this method so that they can be reused, just as if they had been created fresh.
*/
public void reset() throws IOException {}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/Tokenizer.java Thu Aug 30 22:43:41 2012
@@ -54,7 +54,13 @@ public abstract class Tokenizer extends
this.input = input;
}
- /** By default, closes the input Reader. */
+ /**
+ * {@inheritDoc}
+ * <p>
+ * <b>NOTE:</b>
+ * The default implementation closes the input Reader, so
+ * be sure to call <code>super.close()</code> when overriding this method.
+ */
@Override
public void close() throws IOException {
if (input != null) {
@@ -76,12 +82,18 @@ public abstract class Tokenizer extends
return (input instanceof CharFilter) ? ((CharFilter) input).correctOffset(currentOff) : currentOff;
}
- /** Expert: Reset the tokenizer to a new reader. Typically, an
+ /** Expert: Set a new reader on the Tokenizer. Typically, an
* analyzer (in its tokenStream method) will use
* this to re-use a previously created tokenizer. */
- public void setReader(Reader input) throws IOException {
+ public final void setReader(Reader input) throws IOException {
assert input != null: "input must not be null";
this.input = input;
+ assert setReaderTestPoint();
+ }
+
+ // only used by assert, for testing
+ boolean setReaderTestPoint() {
+ return true;
}
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/package.html?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/package.html (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/package.html Thu Aug 30 22:43:41 2012
@@ -817,5 +817,30 @@ As a small hint, this is how the new Att
...
</pre>
+<h4>Adding a CharFilter chain</h4>
+Analyzers take Java {@link java.io.Reader}s as input. Of course you can wrap your Readers with {@link java.io.FilterReader}s
+to manipulate content, but this would have the big disadvantage that character offsets might be inconsistent with your original
+text.
+<p>
+{@link org.apache.lucene.analysis.CharFilter} is designed to allow you to pre-process input like a FilterReader would, but also
+preserve the original offsets associated with those characters. This way mechanisms like highlighting still work correctly.
+CharFilters can be chained.
+<p>
+Example:
+<pre class="prettyprint">
+public class MyAnalyzer extends Analyzer {
+
+ {@literal @Override}
+ protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ return new TokenStreamComponents(new MyTokenizer(reader));
+ }
+
+ {@literal @Override}
+ protected Reader initReader(String fieldName, Reader reader) {
+ // wrap the Reader in a CharFilter chain.
+ return new SecondCharFilter(new FirstCharFilter(reader));
+ }
+}
+</pre>
</body>
</html>
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttribute.java Thu Aug 30 22:43:41 2012
@@ -38,7 +38,11 @@ public interface CharTermAttribute exten
* #resizeBuffer(int)} to increase it. After
* altering the buffer be sure to call {@link
* #setLength} to record the number of valid
- * characters that were placed into the termBuffer. */
+ * characters that were placed into the termBuffer.
+ * <p>
+ * <b>NOTE</b>: The returned buffer may be larger than
+ * the valid {@link #length()}.
+ */
public char[] buffer();
/** Grows the termBuffer to at least size newSize, preserving the
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java Thu Aug 30 22:43:41 2012
@@ -26,14 +26,15 @@ import org.apache.lucene.util.AttributeR
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil;
-/**
- * The term text of a Token.
- */
+/** Default implementation of {@link CharTermAttribute}. */
public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttribute, TermToBytesRefAttribute, Cloneable {
private static int MIN_BUFFER_SIZE = 10;
private char[] termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
private int termLength = 0;
+
+ /** Initialize this attribute with empty term text */
+ public CharTermAttributeImpl() {}
public final void copyBuffer(char[] buffer, int offset, int length) {
growTermBuffer(length);
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java Thu Aug 30 22:43:41 2012
@@ -22,22 +22,23 @@ import org.apache.lucene.util.Attribute;
/**
* This attribute can be used to pass different flags down the {@link Tokenizer} chain,
- * eg from one TokenFilter to another one.
+ * e.g. from one TokenFilter to another one.
+ * <p>
+ * This is completely distinct from {@link TypeAttribute}, although they do share similar purposes.
+ * The flags can be used to encode information about the token for use by other
+ * {@link org.apache.lucene.analysis.TokenFilter}s.
* @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
*/
public interface FlagsAttribute extends Attribute {
/**
- * <p/>
- *
- * Get the bitset for any bits that have been set. This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes.
- * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
- *
- *
+ * Get the bitset for any bits that have been set.
* @return The bits
+ * @see #getFlags()
*/
public int getFlags();
/**
+ * Set the flags to a new bitset.
* @see #getFlags()
*/
public void setFlags(int flags);
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java Thu Aug 30 22:43:41 2012
@@ -19,30 +19,17 @@ package org.apache.lucene.analysis.token
import org.apache.lucene.util.AttributeImpl;
-/**
- * This attribute can be used to pass different flags down the tokenizer chain,
- * eg from one TokenFilter to another one.
- * @lucene.experimental While we think this is here to stay, we may want to change it to be a long.
- */
+/** Default implementation of {@link FlagsAttribute}. */
public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute, Cloneable {
private int flags = 0;
- /**
- * <p/>
- *
- * Get the bitset for any bits that have been set. This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes.
- * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
- *
- *
- * @return The bits
- */
+ /** Initialize this attribute with no bits set */
+ public FlagsAttributeImpl() {}
+
public int getFlags() {
return flags;
}
- /**
- * @see #getFlags()
- */
public void setFlags(int flags) {
this.flags = flags;
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttribute.java Thu Aug 30 22:43:41 2012
@@ -30,20 +30,22 @@ import org.apache.lucene.util.Attribute;
public interface KeywordAttribute extends Attribute {
/**
- * Returns <code>true</code> iff the current token is a keyword, otherwise
- * <code>false</code>/
+ * Returns <code>true</code> if the current token is a keyword, otherwise
+ * <code>false</code>
*
- * @return <code>true</code> iff the current token is a keyword, otherwise
- * <code>false</code>/
+ * @return <code>true</code> if the current token is a keyword, otherwise
+ * <code>false</code>
+ * @see #setKeyword(boolean)
*/
public boolean isKeyword();
/**
- * Marks the current token as keyword iff set to <code>true</code>.
+ * Marks the current token as keyword if set to <code>true</code>.
*
* @param isKeyword
- * <code>true</code> iff the current token is a keyword, otherwise
+ * <code>true</code> if the current token is a keyword, otherwise
* <code>false</code>.
+ * @see #isKeyword()
*/
public void setKeyword(boolean isKeyword);
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/KeywordAttributeImpl.java Thu Aug 30 22:43:41 2012
@@ -17,19 +17,15 @@ package org.apache.lucene.analysis.token
* limitations under the License.
*/
-import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.AttributeImpl;
-/**
- *This attribute can be used to mark a token as a keyword. Keyword aware
- * {@link TokenStream}s can decide to modify a token based on the return value
- * of {@link #isKeyword()} if the token is modified. Stemming filters for
- * instance can use this attribute to conditionally skip a term if
- * {@link #isKeyword()} returns <code>true</code>.
- */
+/** Default implementation of {@link KeywordAttribute}. */
public final class KeywordAttributeImpl extends AttributeImpl implements
KeywordAttribute {
private boolean keyword;
+
+ /** Initialize this attribute with the keyword value as false. */
+ public KeywordAttributeImpl() {}
@Override
public void clear() {
@@ -57,24 +53,10 @@ public final class KeywordAttributeImpl
return keyword == other.keyword;
}
- /**
- * Returns <code>true</code> iff the current token is a keyword, otherwise
- * <code>false</code>/
- *
- * @return <code>true</code> iff the current token is a keyword, otherwise
- * <code>false</code>/
- */
public boolean isKeyword() {
return keyword;
}
- /**
- * Marks the current token as keyword iff set to <code>true</code>.
- *
- * @param isKeyword
- * <code>true</code> iff the current token is a keyword, otherwise
- * <code>false</code>.
- */
public void setKeyword(boolean isKeyword) {
keyword = isKeyword;
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java Thu Aug 30 22:43:41 2012
@@ -23,22 +23,34 @@ import org.apache.lucene.util.Attribute;
* The start and end character offset of a Token.
*/
public interface OffsetAttribute extends Attribute {
- /** Returns this Token's starting offset, the position of the first character
- corresponding to this token in the source text.
-
- Note that the difference between endOffset() and startOffset() may not be
- equal to termText.length(), as the term text may have been altered by a
- stemmer or some other filter. */
+ /**
+ * Returns this Token's starting offset, the position of the first character
+ * corresponding to this token in the source text.
+ * <p>
+ * Note that the difference between {@link #endOffset()} and <code>startOffset()</code>
+ * may not be equal to termText.length(), as the term text may have been altered by a
+ * stemmer or some other filter.
+ * @see #setOffset(int, int)
+ */
public int startOffset();
- /** Set the starting and ending offset.
- @see #startOffset() and #endOffset()*/
+ /**
+ * Set the starting and ending offset.
+ * @throws IllegalArgumentException If <code>startOffset</code> or <code>endOffset</code>
+ * are negative, or if <code>startOffset</code> is greater than
+ * <code>endOffset</code>
+ * @see #startOffset()
+ * @see #endOffset()
+ */
public void setOffset(int startOffset, int endOffset);
- /** Returns this Token's ending offset, one greater than the position of the
- last character corresponding to this token in the source text. The length
- of the token in the source text is (endOffset - startOffset). */
+ /**
+ * Returns this Token's ending offset, one greater than the position of the
+ * last character corresponding to this token in the source text. The length
+ * of the token in the source text is (<code>endOffset()</code> - {@link #startOffset()}).
+ * @see #setOffset(int, int)
+ */
public int endOffset();
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java Thu Aug 30 22:43:41 2012
@@ -19,26 +19,18 @@ package org.apache.lucene.analysis.token
import org.apache.lucene.util.AttributeImpl;
-/**
- * The start and end character offset of a Token.
- */
+/** Default implementation of {@link OffsetAttribute}. */
public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute, Cloneable {
private int startOffset;
private int endOffset;
+
+ /** Initialize this attribute with startOffset and endOffset of 0. */
+ public OffsetAttributeImpl() {}
- /** Returns this Token's starting offset, the position of the first character
- corresponding to this token in the source text.
-
- Note that the difference between endOffset() and startOffset() may not be
- equal to termText.length(), as the term text may have been altered by a
- stemmer or some other filter. */
public int startOffset() {
return startOffset;
}
-
- /** Set the starting and ending offset.
- @see #startOffset() and #endOffset()*/
public void setOffset(int startOffset, int endOffset) {
// TODO: we could assert that this is set-once, ie,
@@ -56,10 +48,6 @@ public class OffsetAttributeImpl extends
this.endOffset = endOffset;
}
-
- /** Returns this Token's ending offset, one greater than the position of the
- last character corresponding to this token in the source text. The length
- of the token in the source text is (endOffset - startOffset). */
public int endOffset() {
return endOffset;
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java Thu Aug 30 22:43:41 2012
@@ -17,20 +17,34 @@ package org.apache.lucene.analysis.token
* limitations under the License.
*/
+import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.BytesRef;
/**
- * The payload of a Token.
+ * The payload of a Token.
+ * <p>
+ * The payload is stored in the index at each position, and can
+ * be used to influence scoring when using Payload-based queries
+ * in the {@link org.apache.lucene.search.payloads} and
+ * {@link org.apache.lucene.search.spans} packages.
+ * <p>
+ * NOTE: because the payload will be stored at each position, its usually
+ * best to use the minimum number of bytes necessary. Some codec implementations
+ * may optimize payload storage when all payloads have the same length.
+ *
+ * @see DocsAndPositionsEnum
*/
public interface PayloadAttribute extends Attribute {
/**
* Returns this Token's payload.
+ * @see #setPayload(BytesRef)
*/
public BytesRef getPayload();
/**
* Sets this Token's payload.
+ * @see #getPayload()
*/
public void setPayload(BytesRef payload);
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java Thu Aug 30 22:43:41 2012
@@ -20,9 +20,7 @@ package org.apache.lucene.analysis.token
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
-/**
- * The payload of a Token.
- */
+/** Default implementation of {@link PayloadAttribute}. */
public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute, Cloneable {
private BytesRef payload;
@@ -38,16 +36,10 @@ public class PayloadAttributeImpl extend
this.payload = payload;
}
- /**
- * Returns this Token's payload.
- */
public BytesRef getPayload() {
return this.payload;
}
- /**
- * Sets this Token's payload.
- */
public void setPayload(BytesRef payload) {
this.payload = payload;
}
Modified: lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java?rev=1379200&r1=1379199&r2=1379200&view=diff
==============================================================================
--- lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java (original)
+++ lucene/dev/branches/lucene3312/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java Thu Aug 30 22:43:41 2012
@@ -49,11 +49,14 @@ public interface PositionIncrementAttrib
/** Set the position increment. The default value is one.
*
* @param positionIncrement the distance from the prior term
+ * @throws IllegalArgumentException if <code>positionIncrement</code>
+ * is negative.
+ * @see #getPositionIncrement()
*/
public void setPositionIncrement(int positionIncrement);
/** Returns the position increment of this Token.
- * @see #setPositionIncrement
+ * @see #setPositionIncrement(int)
*/
public int getPositionIncrement();
}