You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2013/01/18 19:31:23 UTC
svn commit: r1435287 [4/41] - in /lucene/dev/branches/LUCENE-2878: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/libraries/
dev-tools/idea/lucene/analysis/icu/ dev-tools/maven/
dev-tools/maven/lucene/benchmark/ dev-tools/maven/solr/ dev-tools/m...
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java Fri Jan 18 18:30:54 2013
@@ -843,6 +843,7 @@ public final class StandardTokenizerImpl
public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
+ @Override
public final int yychar()
{
return yychar;
@@ -851,6 +852,7 @@ public final class StandardTokenizerImpl
/**
* Fills CharTermAttribute with the current token text.
*/
+ @Override
public final void getText(CharTermAttribute t) {
t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
}
@@ -965,6 +967,7 @@ public final class StandardTokenizerImpl
*
* @param reader the new input stream
*/
+ @Override
public final void yyreset(java.io.Reader reader) {
zzReader = reader;
zzAtBOL = true;
@@ -1024,6 +1027,7 @@ public final class StandardTokenizerImpl
/**
* Returns the length of the matched text region.
*/
+ @Override
public final int yylength() {
return zzMarkedPos-zzStartRead;
}
@@ -1079,6 +1083,7 @@ public final class StandardTokenizerImpl
* @return the next token
* @exception java.io.IOException if any I/O-Error occurs
*/
+ @Override
public int getNextToken() throws java.io.IOException {
int zzInput;
int zzAction;
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerFactory.java Fri Jan 18 18:30:54 2013
@@ -48,6 +48,7 @@ public class UAX29URLEmailTokenizerFacto
StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
}
+ @Override
public UAX29URLEmailTokenizer create(Reader input) {
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(luceneMatchVersion, input);
tokenizer.setMaxTokenLength(maxTokenLength);
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java Fri Jan 18 18:30:54 2013
@@ -4033,6 +4033,7 @@ public final class UAX29URLEmailTokenize
public static final int URL_TYPE = UAX29URLEmailTokenizer.URL;
+ @Override
public final int yychar()
{
return yychar;
@@ -4041,6 +4042,7 @@ public final class UAX29URLEmailTokenize
/**
* Fills CharTermAttribute with the current token text.
*/
+ @Override
public final void getText(CharTermAttribute t) {
t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
}
@@ -4155,6 +4157,7 @@ public final class UAX29URLEmailTokenize
*
* @param reader the new input stream
*/
+ @Override
public final void yyreset(java.io.Reader reader) {
zzReader = reader;
zzAtBOL = true;
@@ -4214,6 +4217,7 @@ public final class UAX29URLEmailTokenize
/**
* Returns the length of the matched text region.
*/
+ @Override
public final int yylength() {
return zzMarkedPos-zzStartRead;
}
@@ -4269,6 +4273,7 @@ public final class UAX29URLEmailTokenize
* @return the next token
* @exception java.io.IOException if any I/O-Error occurs
*/
+ @Override
public int getNextToken() throws java.io.IOException {
int zzInput;
int zzAction;
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemFilterFactory.java Fri Jan 18 18:30:54 2013
@@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.T
*
*/
public class SwedishLightStemFilterFactory extends TokenFilterFactory {
+ @Override
public TokenStream create(TokenStream input) {
return new SwedishLightStemFilter(input);
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java Fri Jan 18 18:30:54 2013
@@ -263,7 +263,7 @@ public final class SynonymFilter extends
this.synonyms = synonyms;
this.ignoreCase = ignoreCase;
this.fst = synonyms.fst;
- this.fstReader = fst.getBytesReader(0);
+ this.fstReader = fst.getBytesReader();
if (fst == null) {
throw new IllegalArgumentException("fst must be non-null");
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilterFactory.java Fri Jan 18 18:30:54 2013
@@ -34,6 +34,7 @@ import org.apache.lucene.analysis.util.T
*
*/
public class ThaiWordFilterFactory extends TokenFilterFactory {
+ @Override
public ThaiWordFilter create(TokenStream input) {
assureMatchVersion();
return new ThaiWordFilter(luceneMatchVersion, input);
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilterFactory.java Fri Jan 18 18:30:54 2013
@@ -35,6 +35,7 @@ import org.apache.lucene.analysis.util.T
*
*/
public class TurkishLowerCaseFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
+ @Override
public TokenStream create(TokenStream input) {
return new TurkishLowerCaseFilter(input);
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayIterator.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayIterator.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayIterator.java Fri Jan 18 18:30:54 2013
@@ -59,34 +59,41 @@ public abstract class CharArrayIterator
this.limit = start + length;
}
+ @Override
public char current() {
return (index == limit) ? DONE : jreBugWorkaround(array[index]);
}
protected abstract char jreBugWorkaround(char ch);
+ @Override
public char first() {
index = start;
return current();
}
+ @Override
public int getBeginIndex() {
return 0;
}
+ @Override
public int getEndIndex() {
return length;
}
+ @Override
public int getIndex() {
return index - start;
}
+ @Override
public char last() {
index = (limit == start) ? limit : limit - 1;
return current();
}
+ @Override
public char next() {
if (++index >= limit) {
index = limit;
@@ -96,6 +103,7 @@ public abstract class CharArrayIterator
}
}
+ @Override
public char previous() {
if (--index < start) {
index = start;
@@ -105,6 +113,7 @@ public abstract class CharArrayIterator
}
}
+ @Override
public char setIndex(int position) {
if (position < getBeginIndex() || position > getEndIndex())
throw new IllegalArgumentException("Illegal Position: " + position);
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java Fri Jan 18 18:30:54 2013
@@ -421,6 +421,7 @@ public class CharArrayMap<V> extends Abs
while (pos < keys.length && keys[pos] == null) pos++;
}
+ @Override
public boolean hasNext() {
return pos < keys.length;
}
@@ -451,11 +452,13 @@ public class CharArrayMap<V> extends Abs
}
/** use nextCharArray() + currentValue() for better efficiency. */
+ @Override
public Map.Entry<Object,V> next() {
goNext();
return new MapEntry(lastPos, allowModify);
}
+ @Override
public void remove() {
throw new UnsupportedOperationException();
}
@@ -470,16 +473,19 @@ public class CharArrayMap<V> extends Abs
this.allowModify = allowModify;
}
+ @Override
public Object getKey() {
// we must clone here, as putAll to another CharArrayMap
// with other case sensitivity flag would corrupt the keys
return keys[pos].clone();
}
+ @Override
public V getValue() {
return values[pos];
}
+ @Override
public V setValue(V value) {
if (!allowModify)
throw new UnsupportedOperationException();
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java Fri Jan 18 18:30:54 2013
@@ -38,6 +38,7 @@ public class ElisionFilterFactory extend
private CharArraySet articles;
+ @Override
public void inform(ResourceLoader loader) throws IOException {
String articlesFile = args.get("articles");
boolean ignoreCase = getBoolean("ignoreCase", false);
@@ -50,6 +51,7 @@ public class ElisionFilterFactory extend
}
}
+ @Override
public ElisionFilter create(TokenStream input) {
return new ElisionFilter(input, articles);
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/OpenStringBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/OpenStringBuilder.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/OpenStringBuilder.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/OpenStringBuilder.java Fri Jan 18 18:30:54 2013
@@ -45,13 +45,16 @@ public class OpenStringBuilder implement
public char[] getArray() { return buf; }
public int size() { return len; }
+ @Override
public int length() { return len; }
public int capacity() { return buf.length; }
+ @Override
public Appendable append(CharSequence csq) {
return append(csq, 0, csq.length());
}
+ @Override
public Appendable append(CharSequence csq, int start, int end) {
reserve(end-start);
for (int i=start; i<end; i++) {
@@ -60,11 +63,13 @@ public class OpenStringBuilder implement
return this;
}
+ @Override
public Appendable append(char c) {
write(c);
return this;
}
+ @Override
public char charAt(int index) {
return buf[index];
}
@@ -73,6 +78,7 @@ public class OpenStringBuilder implement
buf[index] = ch;
}
+ @Override
public CharSequence subSequence(int start, int end) {
throw new UnsupportedOperationException(); // todo
}
@@ -139,6 +145,7 @@ public class OpenStringBuilder implement
return newbuf;
}
+ @Override
public String toString() {
return new String(buf, 0, size());
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerFactory.java Fri Jan 18 18:30:54 2013
@@ -35,6 +35,7 @@ import org.apache.lucene.analysis.wikipe
*/
public class WikipediaTokenizerFactory extends TokenizerFactory {
// TODO: add support for WikipediaTokenizer's advanced options.
+ @Override
public Tokenizer create(Reader input) {
return new WikipediaTokenizer(input);
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/ArmenianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/ArmenianStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/ArmenianStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/ArmenianStemmer.java Fri Jan 18 18:30:54 2013
@@ -425,6 +425,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -502,10 +503,12 @@ private static final long serialVersionU
cursor = limit_backward; return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof ArmenianStemmer;
}
+ @Override
public int hashCode() {
return ArmenianStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/BasqueStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/BasqueStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/BasqueStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/BasqueStemmer.java Fri Jan 18 18:30:54 2013
@@ -863,6 +863,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -925,10 +926,12 @@ private static final long serialVersionU
cursor = limit_backward; return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof BasqueStemmer;
}
+ @Override
public int hashCode() {
return BasqueStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/CatalanStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/CatalanStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/CatalanStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/CatalanStemmer.java Fri Jan 18 18:30:54 2013
@@ -977,6 +977,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -1055,10 +1056,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof CatalanStemmer;
}
+ @Override
public int hashCode() {
return CatalanStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/DanishStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/DanishStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/DanishStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/DanishStemmer.java Fri Jan 18 18:30:54 2013
@@ -363,6 +363,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -426,10 +427,12 @@ private static final long serialVersionU
cursor = limit_backward; return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof DanishStemmer;
}
+ @Override
public int hashCode() {
return DanishStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/DutchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/DutchStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/DutchStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/DutchStemmer.java Fri Jan 18 18:30:54 2013
@@ -789,6 +789,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -840,10 +841,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof DutchStemmer;
}
+ @Override
public int hashCode() {
return DutchStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/EnglishStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/EnglishStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/EnglishStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/EnglishStemmer.java Fri Jan 18 18:30:54 2013
@@ -1147,6 +1147,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -1317,10 +1318,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof EnglishStemmer;
}
+ @Override
public int hashCode() {
return EnglishStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/FinnishStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/FinnishStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/FinnishStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/FinnishStemmer.java Fri Jan 18 18:30:54 2013
@@ -924,6 +924,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -1037,10 +1038,12 @@ private static final long serialVersionU
cursor = limit_backward; return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof FinnishStemmer;
}
+ @Override
public int hashCode() {
return FinnishStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/FrenchStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/FrenchStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/FrenchStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/FrenchStemmer.java Fri Jan 18 18:30:54 2013
@@ -1348,6 +1348,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -1504,10 +1505,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof FrenchStemmer;
}
+ @Override
public int hashCode() {
return FrenchStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/German2Stemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/German2Stemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/German2Stemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/German2Stemmer.java Fri Jan 18 18:30:54 2013
@@ -678,6 +678,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -729,10 +730,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof German2Stemmer;
}
+ @Override
public int hashCode() {
return German2Stemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/GermanStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/GermanStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/GermanStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/GermanStemmer.java Fri Jan 18 18:30:54 2013
@@ -640,6 +640,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -691,10 +692,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof GermanStemmer;
}
+ @Override
public int hashCode() {
return GermanStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/HungarianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/HungarianStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/HungarianStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/HungarianStemmer.java Fri Jan 18 18:30:54 2013
@@ -1043,6 +1043,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -1161,10 +1162,12 @@ private static final long serialVersionU
cursor = limit_backward; return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof HungarianStemmer;
}
+ @Override
public int hashCode() {
return HungarianStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/IrishStemmer.java Fri Jan 18 18:30:54 2013
@@ -510,6 +510,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -573,10 +574,12 @@ private static final long serialVersionU
cursor = limit_backward; return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof IrishStemmer;
}
+ @Override
public int hashCode() {
return IrishStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/ItalianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/ItalianStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/ItalianStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/ItalianStemmer.java Fri Jan 18 18:30:54 2013
@@ -1094,6 +1094,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -1183,10 +1184,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof ItalianStemmer;
}
+ @Override
public int hashCode() {
return ItalianStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/KpStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/KpStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/KpStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/KpStemmer.java Fri Jan 18 18:30:54 2013
@@ -1856,6 +1856,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -2184,10 +2185,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof KpStemmer;
}
+ @Override
public int hashCode() {
return KpStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/LovinsStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/LovinsStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/LovinsStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/LovinsStemmer.java Fri Jan 18 18:30:54 2013
@@ -1870,6 +1870,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -1911,10 +1912,12 @@ private static final long serialVersionU
cursor = limit_backward; return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof LovinsStemmer;
}
+ @Override
public int hashCode() {
return LovinsStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/NorwegianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/NorwegianStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/NorwegianStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/NorwegianStemmer.java Fri Jan 18 18:30:54 2013
@@ -309,6 +309,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -361,10 +362,12 @@ private static final long serialVersionU
cursor = limit_backward; return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof NorwegianStemmer;
}
+ @Override
public int hashCode() {
return NorwegianStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/PorterStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/PorterStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/PorterStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/PorterStemmer.java Fri Jan 18 18:30:54 2013
@@ -610,6 +610,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -909,10 +910,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof PorterStemmer;
}
+ @Override
public int hashCode() {
return PorterStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/PortugueseStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/PortugueseStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/PortugueseStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/PortugueseStemmer.java Fri Jan 18 18:30:54 2013
@@ -989,6 +989,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -1119,10 +1120,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof PortugueseStemmer;
}
+ @Override
public int hashCode() {
return PortugueseStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/RomanianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/RomanianStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/RomanianStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/RomanianStemmer.java Fri Jan 18 18:30:54 2013
@@ -927,6 +927,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -1027,10 +1028,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof RomanianStemmer;
}
+ @Override
public int hashCode() {
return RomanianStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/RussianStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/RussianStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/RussianStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/RussianStemmer.java Fri Jan 18 18:30:54 2013
@@ -596,6 +596,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -730,10 +731,12 @@ private static final long serialVersionU
cursor = limit_backward; return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof RussianStemmer;
}
+ @Override
public int hashCode() {
return RussianStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/SpanishStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/SpanishStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/SpanishStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/SpanishStemmer.java Fri Jan 18 18:30:54 2013
@@ -1098,6 +1098,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -1185,10 +1186,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof SpanishStemmer;
}
+ @Override
public int hashCode() {
return SpanishStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/SwedishStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/SwedishStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/SwedishStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/SwedishStemmer.java Fri Jan 18 18:30:54 2013
@@ -300,6 +300,7 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -352,10 +353,12 @@ private static final long serialVersionU
cursor = limit_backward; return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof SwedishStemmer;
}
+ @Override
public int hashCode() {
return SwedishStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/TurkishStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/TurkishStemmer.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/TurkishStemmer.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/java/org/tartarus/snowball/ext/TurkishStemmer.java Fri Jan 18 18:30:54 2013
@@ -3086,6 +3086,7 @@ private static final long serialVersionU
cursor = limit_backward; return true;
}
+ @Override
public boolean stem() {
int v_1;
int v_2;
@@ -3133,10 +3134,12 @@ private static final long serialVersionU
return true;
}
+ @Override
public boolean equals( Object o ) {
return o instanceof TurkishStemmer;
}
+ @Override
public int hashCode() {
return TurkishStemmer.class.getName().hashCode();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java Fri Jan 18 18:30:54 2013
@@ -275,9 +275,11 @@ public class TestCompoundWordTokenFilter
public void clear() {
retain = false;
}
+ @Override
public boolean getRetain() {
return retain;
}
+ @Override
public void setRetain(boolean retain) {
this.retain = retain;
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java Fri Jan 18 18:30:54 2013
@@ -17,19 +17,16 @@ package org.apache.lucene.analysis.core;
* limitations under the License.
*/
-import java.lang.reflect.Modifier;
import java.io.Reader;
import java.io.StringReader;
-import java.util.ArrayList;
+import java.lang.reflect.Modifier;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.List;
-import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.CharFilter;
-import org.apache.lucene.analysis.EmptyTokenizer;
import org.apache.lucene.analysis.MockCharFilter;
import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
import org.apache.lucene.analysis.MockGraphTokenFilter;
@@ -39,10 +36,9 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.MockVariableLengthPayloadFilter;
import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ValidatingTokenFilter;
-import org.apache.lucene.analysis.core.TestRandomChains;
import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
import org.apache.lucene.analysis.sinks.TeeSinkTokenFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
@@ -72,7 +68,6 @@ public class TestAllAnalyzersHaveFactori
MockRandomLookaheadTokenFilter.class,
MockTokenFilter.class,
MockVariableLengthPayloadFilter.class,
- EmptyTokenizer.class,
ValidatingTokenFilter.class
);
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java Fri Jan 18 18:30:54 2013
@@ -47,6 +47,7 @@ import org.apache.lucene.util.automaton.
public class TestDuelingAnalyzers extends LuceneTestCase {
private CharacterRunAutomaton jvmLetter;
+ @Override
public void setUp() throws Exception {
super.setUp();
// build an automaton matching this jvm's letter definition
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Fri Jan 18 18:30:54 2013
@@ -34,6 +34,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Enumeration;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
@@ -46,7 +47,6 @@ import org.apache.lucene.analysis.Analyz
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.CharFilter;
-import org.apache.lucene.analysis.EmptyTokenizer;
import org.apache.lucene.analysis.MockGraphTokenFilter;
import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter;
import org.apache.lucene.analysis.MockTokenFilter;
@@ -67,6 +67,8 @@ import org.apache.lucene.analysis.compou
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
import org.apache.lucene.analysis.hunspell.HunspellDictionaryTest;
import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
+import org.apache.lucene.analysis.miscellaneous.KeepWordFilter;
+import org.apache.lucene.analysis.miscellaneous.LengthFilter;
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
@@ -104,69 +106,145 @@ public class TestRandomChains extends Ba
static List<Constructor<? extends TokenFilter>> tokenfilters;
static List<Constructor<? extends CharFilter>> charfilters;
- // TODO: fix those and remove
- private static final Set<Class<?>> brokenComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
+ private static interface Predicate<T> {
+ boolean apply(T o);
+ }
+
+ private static final Predicate<Object[]> ALWAYS = new Predicate<Object[]>() {
+ public boolean apply(Object[] args) {
+ return true;
+ };
+ };
+
+ private static final Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new HashMap<Constructor<?>, Predicate<Object[]>>();
static {
- // TODO: can we promote some of these to be only
- // offsets offenders?
- Collections.<Class<?>>addAll(brokenComponents,
- // TODO: fix basetokenstreamtestcase not to trip because this one has no CharTermAtt
- EmptyTokenizer.class,
- // doesn't actual reset itself!
- CachingTokenFilter.class,
- // doesn't consume whole stream!
- LimitTokenCountFilter.class,
- // Not broken: we forcefully add this, so we shouldn't
- // also randomly pick it:
- ValidatingTokenFilter.class,
- // NOTE: these by themselves won't cause any 'basic assertions' to fail.
- // but see https://issues.apache.org/jira/browse/LUCENE-3920, if any
- // tokenfilter that combines words (e.g. shingles) comes after them,
- // this will create bogus offsets because their 'offsets go backwards',
- // causing shingle or whatever to make a single token with a
- // startOffset thats > its endOffset
- // (see LUCENE-3738 for a list of other offenders here)
- // broken!
- NGramTokenizer.class,
- // broken!
- NGramTokenFilter.class,
- // broken!
- EdgeNGramTokenizer.class,
- // broken!
- EdgeNGramTokenFilter.class,
- // broken!
- WordDelimiterFilter.class,
- // broken!
- TrimFilter.class
- );
+ try {
+ brokenConstructors.put(
+ LimitTokenCountFilter.class.getConstructor(TokenStream.class, int.class),
+ ALWAYS);
+ brokenConstructors.put(
+ LimitTokenCountFilter.class.getConstructor(TokenStream.class, int.class, boolean.class),
+ new Predicate<Object[]>() {
+ @Override
+ public boolean apply(Object[] args) {
+ assert args.length == 3;
+ return !((Boolean) args[2]); // args are broken if consumeAllTokens is false
+ }
+ });
+ for (Class<?> c : Arrays.<Class<?>>asList(
+ // TODO: can we promote some of these to be only
+ // offsets offenders?
+ // doesn't actual reset itself!
+ CachingTokenFilter.class,
+ // Not broken: we forcefully add this, so we shouldn't
+ // also randomly pick it:
+ ValidatingTokenFilter.class,
+ // NOTE: these by themselves won't cause any 'basic assertions' to fail.
+ // but see https://issues.apache.org/jira/browse/LUCENE-3920, if any
+ // tokenfilter that combines words (e.g. shingles) comes after them,
+ // this will create bogus offsets because their 'offsets go backwards',
+ // causing shingle or whatever to make a single token with a
+ // startOffset thats > its endOffset
+ // (see LUCENE-3738 for a list of other offenders here)
+ // broken!
+ NGramTokenizer.class,
+ // broken!
+ NGramTokenFilter.class,
+ // broken!
+ EdgeNGramTokenizer.class,
+ // broken!
+ EdgeNGramTokenFilter.class,
+ // broken!
+ WordDelimiterFilter.class)) {
+ for (Constructor<?> ctor : c.getConstructors()) {
+ brokenConstructors.put(ctor, ALWAYS);
+ }
+ }
+ } catch (Exception e) {
+ throw new Error(e);
+ }
}
// TODO: also fix these and remove (maybe):
- // Classes that don't produce consistent graph offsets:
- private static final Set<Class<?>> brokenOffsetsComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
+ // Classes/options that don't produce consistent graph offsets:
+ private static final Map<Constructor<?>,Predicate<Object[]>> brokenOffsetsConstructors = new HashMap<Constructor<?>, Predicate<Object[]>>();
static {
- Collections.<Class<?>>addAll(brokenOffsetsComponents,
- ReversePathHierarchyTokenizer.class,
- PathHierarchyTokenizer.class,
- HyphenationCompoundWordTokenFilter.class,
- DictionaryCompoundWordTokenFilter.class,
- // TODO: corrumpts graphs (offset consistency check):
- PositionFilter.class,
- // TODO: it seems to mess up offsets!?
- WikipediaTokenizer.class,
- // TODO: doesn't handle graph inputs
- ThaiWordFilter.class,
- // TODO: doesn't handle graph inputs
- CJKBigramFilter.class,
- // TODO: doesn't handle graph inputs (or even look at positionIncrement)
- HyphenatedWordsFilter.class,
- // LUCENE-4065: only if you pass 'false' to enablePositionIncrements!
- TypeTokenFilter.class,
- // TODO: doesn't handle graph inputs
- CommonGramsQueryFilter.class
- );
+ try {
+ brokenOffsetsConstructors.put(
+ TrimFilter.class.getConstructor(TokenStream.class, boolean.class),
+ new Predicate<Object[]>() {
+ @Override
+ public boolean apply(Object[] args) {
+ assert args.length == 2;
+ return (Boolean) args[1]; // args are broken if updateOffsets is true
+ }
+ });
+ brokenOffsetsConstructors.put(
+ TypeTokenFilter.class.getConstructor(boolean.class, TokenStream.class, Set.class, boolean.class),
+ new Predicate<Object[]>() {
+ @Override
+ public boolean apply(Object[] args) {
+ assert args.length == 4;
+ // LUCENE-4065: only if you pass 'false' to enablePositionIncrements!
+ return !(Boolean) args[0];
+ }
+ });
+ brokenOffsetsConstructors.put(
+ TypeTokenFilter.class.getConstructor(boolean.class, TokenStream.class, Set.class),
+ new Predicate<Object[]>() {
+ @Override
+ public boolean apply(Object[] args) {
+ assert args.length == 3;
+ // LUCENE-4065: only if you pass 'false' to enablePositionIncrements!
+ return !(Boolean) args[0];
+ }
+ });
+ brokenOffsetsConstructors.put(
+ LengthFilter.class.getConstructor(boolean.class, TokenStream.class, int.class, int.class),
+ new Predicate<Object[]>() {
+ @Override
+ public boolean apply(Object[] args) {
+ assert args.length == 4;
+ // LUCENE-4065: only if you pass 'false' to enablePositionIncrements!
+ return !(Boolean) args[0];
+ }
+ });
+ brokenOffsetsConstructors.put(
+ KeepWordFilter.class.getConstructor(boolean.class, TokenStream.class, CharArraySet.class),
+ new Predicate<Object[]>() {
+ @Override
+ public boolean apply(Object[] args) {
+ assert args.length == 3;
+ // LUCENE-4065: only if you pass 'false' to enablePositionIncrements!
+ return !(Boolean) args[0];
+ }
+ });
+ for (Class<?> c : Arrays.<Class<?>>asList(
+ ReversePathHierarchyTokenizer.class,
+ PathHierarchyTokenizer.class,
+ HyphenationCompoundWordTokenFilter.class,
+ DictionaryCompoundWordTokenFilter.class,
+ // TODO: corrumpts graphs (offset consistency check):
+ PositionFilter.class,
+ // TODO: it seems to mess up offsets!?
+ WikipediaTokenizer.class,
+ // TODO: doesn't handle graph inputs
+ ThaiWordFilter.class,
+ // TODO: doesn't handle graph inputs
+ CJKBigramFilter.class,
+ // TODO: doesn't handle graph inputs (or even look at positionIncrement)
+ HyphenatedWordsFilter.class,
+ // TODO: doesn't handle graph inputs
+ CommonGramsQueryFilter.class)) {
+ for (Constructor<?> ctor : c.getConstructors()) {
+ brokenOffsetsConstructors.put(ctor, ALWAYS);
+ }
+ }
+ } catch (Exception e) {
+ throw new Error(e);
+ }
}
-
+
@BeforeClass
public static void beforeClass() throws Exception {
List<Class<?>> analysisClasses = getClassesForPackage("org.apache.lucene.analysis");
@@ -179,7 +257,6 @@ public class TestRandomChains extends Ba
// don't waste time with abstract classes or deprecated known-buggy ones
Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers)
|| c.isSynthetic() || c.isAnonymousClass() || c.isMemberClass() || c.isInterface()
- || brokenComponents.contains(c)
|| c.isAnnotationPresent(Deprecated.class)
|| !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharFilter.class.isAssignableFrom(c))
) {
@@ -188,7 +265,7 @@ public class TestRandomChains extends Ba
for (final Constructor<?> ctor : c.getConstructors()) {
// don't test synthetic or deprecated ctors, they likely have known bugs:
- if (ctor.isSynthetic() || ctor.isAnnotationPresent(Deprecated.class)) {
+ if (ctor.isSynthetic() || ctor.isAnnotationPresent(Deprecated.class) || brokenConstructors.get(ctor) == ALWAYS) {
continue;
}
if (Tokenizer.class.isAssignableFrom(c)) {
@@ -682,7 +759,17 @@ public class TestRandomChains extends Ba
}
return null; // no success
}
-
+
+ private boolean broken(Constructor<?> ctor, Object[] args) {
+ final Predicate<Object[]> pred = brokenConstructors.get(ctor);
+ return pred != null && pred.apply(args);
+ }
+
+ private boolean brokenOffsets(Constructor<?> ctor, Object[] args) {
+ final Predicate<Object[]> pred = brokenOffsetsConstructors.get(ctor);
+ return pred != null && pred.apply(args);
+ }
+
// create a new random tokenizer from classpath
private TokenizerSpec newTokenizer(Random random, Reader reader) {
TokenizerSpec spec = new TokenizerSpec();
@@ -691,11 +778,12 @@ public class TestRandomChains extends Ba
final StringBuilder descr = new StringBuilder();
final CheckThatYouDidntReadAnythingReaderWrapper wrapper = new CheckThatYouDidntReadAnythingReaderWrapper(reader);
final Object args[] = newTokenizerArgs(random, wrapper, ctor.getParameterTypes());
+ if (broken(ctor, args)) {
+ continue;
+ }
spec.tokenizer = createComponent(ctor, args, descr);
if (spec.tokenizer != null) {
- if (brokenOffsetsComponents.contains(ctor.getDeclaringClass())) {
- spec.offsetsAreCorrect = false;
- }
+ spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
spec.toString = descr.toString();
} else {
assertFalse(ctor.getDeclaringClass().getName() + " has read something in ctor but failed with UOE/IAE", wrapper.readSomething);
@@ -713,6 +801,9 @@ public class TestRandomChains extends Ba
while (true) {
final Constructor<? extends CharFilter> ctor = charfilters.get(random.nextInt(charfilters.size()));
final Object args[] = newCharFilterArgs(random, spec.reader, ctor.getParameterTypes());
+ if (broken(ctor, args)) {
+ continue;
+ }
reader = createComponent(ctor, args, descr);
if (reader != null) {
spec.reader = reader;
@@ -749,11 +840,12 @@ public class TestRandomChains extends Ba
}
final Object args[] = newFilterArgs(random, spec.stream, ctor.getParameterTypes());
+ if (broken(ctor, args)) {
+ continue;
+ }
final TokenFilter flt = createComponent(ctor, args, descr);
if (flt != null) {
- if (brokenOffsetsComponents.contains(ctor.getDeclaringClass())) {
- spec.offsetsAreCorrect = false;
- }
+ spec.offsetsAreCorrect &= !brokenOffsets(ctor, args);
spec.stream = flt;
break;
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellDictionaryTest.java Fri Jan 18 18:30:54 2013
@@ -38,51 +38,63 @@ public class HunspellDictionaryTest exte
this.delegate = delegate;
}
+ @Override
public int read() throws IOException {
return delegate.read();
}
+ @Override
public int hashCode() {
return delegate.hashCode();
}
+ @Override
public int read(byte[] b) throws IOException {
return delegate.read(b);
}
+ @Override
public boolean equals(Object obj) {
return delegate.equals(obj);
}
+ @Override
public int read(byte[] b, int off, int len) throws IOException {
return delegate.read(b, off, len);
}
+ @Override
public long skip(long n) throws IOException {
return delegate.skip(n);
}
+ @Override
public String toString() {
return delegate.toString();
}
+ @Override
public int available() throws IOException {
return delegate.available();
}
+ @Override
public void close() throws IOException {
this.closed = true;
delegate.close();
}
+ @Override
public void mark(int readlimit) {
delegate.mark(readlimit);
}
+ @Override
public void reset() throws IOException {
delegate.reset();
}
+ @Override
public boolean markSupported() {
return delegate.markSupported();
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestEmptyTokenStream.java Fri Jan 18 18:30:54 2013
@@ -20,15 +20,55 @@ package org.apache.lucene.analysis.misce
import java.io.IOException;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
-public class TestEmptyTokenStream extends LuceneTestCase {
+public class TestEmptyTokenStream extends BaseTokenStreamTestCase {
- public void test() throws IOException {
+ public void testConsume() throws IOException {
TokenStream ts = new EmptyTokenStream();
+ ts.reset();
assertFalse(ts.incrementToken());
+ ts.end();
+ ts.close();
+ // try again with reuse:
ts.reset();
assertFalse(ts.incrementToken());
+ ts.end();
+ ts.close();
+ }
+
+ public void testConsume2() throws IOException {
+ BaseTokenStreamTestCase.assertTokenStreamContents(new EmptyTokenStream(), new String[0]);
+ }
+
+ public void testIndexWriter_LUCENE4656() throws IOException {
+ Directory directory = newDirectory();
+ IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(
+ TEST_VERSION_CURRENT, null));
+
+ TokenStream ts = new EmptyTokenStream();
+ assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class));
+
+ Document doc = new Document();
+ doc.add(new StringField("id", "0", Field.Store.YES));
+ doc.add(new TextField("description", ts));
+
+ // this should not fail because we have no TermToBytesRefAttribute
+ writer.addDocument(doc);
+
+ assertEquals(1, writer.numDocs());
+
+ writer.close();
+ directory.close();
}
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java Fri Jan 18 18:30:54 2013
@@ -29,6 +29,7 @@ public class StringMockResourceLoader im
this.text = text;
}
+ @Override
public <T> T newInstance(String cname, Class<T> expectedType) {
try {
Class<? extends T> clazz = Class.forName(cname).asSubclass(expectedType);
@@ -38,6 +39,7 @@ public class StringMockResourceLoader im
}
}
+ @Override
public InputStream openResource(String resource) throws IOException {
return new ByteArrayInputStream(text.getBytes("UTF-8"));
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/build.xml?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/build.xml (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/build.xml Fri Jan 18 18:30:54 2013
@@ -26,7 +26,7 @@
<import file="../analysis-module-build.xml"/>
<path id="icujar">
- <pathelement location="lib/icu4j-49.1.jar"/>
+ <fileset dir="lib"/>
</path>
<path id="classpath">
@@ -35,6 +35,11 @@
<path refid="base.classpath"/>
</path>
+ <path id="test.classpath">
+ <path refid="test.base.classpath" />
+ <pathelement path="src/test-files" />
+ </path>
+
<target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
<property name="utr30.data.dir" location="src/data/utr30"/>
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilterFactory.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUFoldingFilterFactory.java Fri Jan 18 18:30:54 2013
@@ -34,6 +34,7 @@ public class ICUFoldingFilterFactory ext
return new ICUFoldingFilter(input);
}
+ @Override
public AbstractAnalysisFactory getMultiTermComponent() {
return this;
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2FilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2FilterFactory.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2FilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2FilterFactory.java Fri Jan 18 18:30:54 2013
@@ -79,10 +79,12 @@ public class ICUNormalizer2FilterFactory
}
}
+ @Override
public TokenStream create(TokenStream input) {
return new ICUNormalizer2Filter(input, normalizer);
}
+ @Override
public AbstractAnalysisFactory getMultiTermComponent() {
return this;
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilter.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilter.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilter.java Fri Jan 18 18:30:54 2013
@@ -124,32 +124,39 @@ public final class ICUTransformFilter ex
this.length = token.length();
}
+ @Override
public int char32At(int pos) {
return UTF16.charAt(buffer, 0, length, pos);
}
+ @Override
public char charAt(int pos) {
return buffer[pos];
}
+ @Override
public void copy(int start, int limit, int dest) {
char text[] = new char[limit - start];
getChars(start, limit, text, 0);
replace(dest, dest, text, 0, limit - start);
}
+ @Override
public void getChars(int srcStart, int srcLimit, char[] dst, int dstStart) {
System.arraycopy(buffer, srcStart, dst, dstStart, srcLimit - srcStart);
}
+ @Override
public boolean hasMetaData() {
return false;
}
+ @Override
public int length() {
return length;
}
+ @Override
public void replace(int start, int limit, String text) {
final int charsLen = text.length();
final int newLength = shiftForReplace(start, limit, charsLen);
@@ -158,6 +165,7 @@ public final class ICUTransformFilter ex
token.setLength(length = newLength);
}
+ @Override
public void replace(int start, int limit, char[] text, int charsStart,
int charsLen) {
// shift text if necessary for the replacement
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilterFactory.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilterFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUTransformFilterFactory.java Fri Jan 18 18:30:54 2013
@@ -64,6 +64,7 @@ public class ICUTransformFilterFactory e
transliterator = Transliterator.getInstance(id, dir);
}
+ @Override
public TokenStream create(TokenStream input) {
return new ICUTransformFilter(input, transliterator);
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/CharArrayIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/CharArrayIterator.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/CharArrayIterator.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/CharArrayIterator.java Fri Jan 18 18:30:54 2013
@@ -57,32 +57,39 @@ final class CharArrayIterator implements
this.limit = start + length;
}
+ @Override
public char current() {
return (index == limit) ? DONE : array[index];
}
+ @Override
public char first() {
index = start;
return current();
}
+ @Override
public int getBeginIndex() {
return 0;
}
+ @Override
public int getEndIndex() {
return length;
}
+ @Override
public int getIndex() {
return index - start;
}
+ @Override
public char last() {
index = (limit == start) ? limit : limit - 1;
return current();
}
+ @Override
public char next() {
if (++index >= limit) {
index = limit;
@@ -92,6 +99,7 @@ final class CharArrayIterator implements
}
}
+ @Override
public char previous() {
if (--index < start) {
index = start;
@@ -101,6 +109,7 @@ final class CharArrayIterator implements
}
}
+ @Override
public char setIndex(int position) {
if (position < getBeginIndex() || position > getEndIndex())
throw new IllegalArgumentException("Illegal Position: " + position);
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java Fri Jan 18 18:30:54 2013
@@ -17,22 +17,135 @@ package org.apache.lucene.analysis.icu.s
* limitations under the License.
*/
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
import java.io.Reader;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory; // javadocs
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.IOUtils;
-/** Factory for {@link ICUTokenizer} */
-public class ICUTokenizerFactory extends TokenizerFactory {
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UProperty;
+import com.ibm.icu.lang.UScript;
+import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.RuleBasedBreakIterator;
+
+/**
+ * Factory for {@link ICUTokenizer}.
+ * Words are broken across script boundaries, then segmented according to
+ * the BreakIterator and typing provided by the {@link DefaultICUTokenizerConfig}.
+ *
+ * <p/>
+ *
+ * To use the default set of per-script rules:
+ *
+ * <pre class="prettyprint" >
+ * <fieldType name="text_icu" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.ICUTokenizerFactory"/>
+ * </analyzer>
+ * </fieldType></pre>
+ *
+ * <p/>
+ *
+ * You can customize this tokenizer's behavior by specifying per-script rule files,
+ * which are compiled by the ICU RuleBasedBreakIterator. See the
+ * <a href="http://userguide.icu-project.org/boundaryanalysis#TOC-RBBI-Rules"
+ * >ICU RuleBasedBreakIterator syntax reference</a>.
+ *
+ * To add per-script rules, add a "rulefiles" argument, which should contain a
+ * comma-separated list of <tt>code:rulefile</tt> pairs in the following format:
+ * <a href="http://unicode.org/iso15924/iso15924-codes.html"
+ * >four-letter ISO 15924 script code</a>, followed by a colon, then a resource
+ * path. E.g. to specify rules for Latin (script code "Latn") and Cyrillic
+ * (script code "Cyrl"):
+ *
+ * <pre class="prettyprint" >
+ * <fieldType name="text_icu_custom" class="solr.TextField" positionIncrementGap="100">
+ * <analyzer>
+ * <tokenizer class="solr.ICUTokenizerFactory"
+ * rulefiles="Latn:my.Latin.rules.rbbi,Cyrl:my.Cyrillic.rules.rbbi"/>
+ * </analyzer>
+ * </fieldType></pre>
+ *
+ */
+public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {
+ static final String RULEFILES = "rulefiles";
+ private Map<Integer,String> tailored;
+ private ICUTokenizerConfig config;
/** Sole constructor. See {@link AbstractAnalysisFactory} for initialization lifecycle. */
public ICUTokenizerFactory() {}
- // TODO: add support for custom configs
+ @Override
+ public void init(Map<String,String> args) {
+ super.init(args);
+ tailored = new HashMap<Integer,String>();
+ String rulefilesArg = args.get(RULEFILES);
+ if (rulefilesArg != null) {
+ List<String> scriptAndResourcePaths = splitFileNames(rulefilesArg);
+ for (String scriptAndResourcePath : scriptAndResourcePaths) {
+ int colonPos = scriptAndResourcePath.indexOf(":");
+ String scriptCode = scriptAndResourcePath.substring(0, colonPos).trim();
+ String resourcePath = scriptAndResourcePath.substring(colonPos+1).trim();
+ tailored.put(UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptCode), resourcePath);
+ }
+ }
+ }
+
+ @Override
+ public void inform(ResourceLoader loader) throws IOException {
+ assert tailored != null : "init must be called first!";
+ if (tailored.isEmpty()) {
+ config = new DefaultICUTokenizerConfig();
+ } else {
+ final BreakIterator breakers[] = new BreakIterator[UScript.CODE_LIMIT];
+ for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
+ int code = entry.getKey();
+ String resourcePath = entry.getValue();
+ breakers[code] = parseRules(resourcePath, loader);
+ }
+ config = new DefaultICUTokenizerConfig() {
+
+ @Override
+ public BreakIterator getBreakIterator(int script) {
+ if (breakers[script] != null) {
+ return (BreakIterator) breakers[script].clone();
+ } else {
+ return super.getBreakIterator(script);
+ }
+ }
+ // TODO: we could also allow codes->types mapping
+ };
+ }
+ }
+
+ private BreakIterator parseRules(String filename, ResourceLoader loader) throws IOException {
+ StringBuilder rules = new StringBuilder();
+ InputStream rulesStream = loader.openResource(filename);
+ BufferedReader reader = new BufferedReader
+ (IOUtils.getDecodingReader(rulesStream, IOUtils.CHARSET_UTF_8));
+ String line = null;
+ while ((line = reader.readLine()) != null) {
+ if ( ! line.startsWith("#"))
+ rules.append(line);
+ rules.append('\n');
+ }
+ reader.close();
+ return new RuleBasedBreakIterator(rules.toString());
+ }
+
@Override
public Tokenizer create(Reader input) {
- return new ICUTokenizer(input);
+ assert config != null : "inform must be called first!";
+ return new ICUTokenizer(input, config);
}
}
Modified: lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java?rev=1435287&r1=1435286&r2=1435287&view=diff
==============================================================================
--- lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java (original)
+++ lucene/dev/branches/LUCENE-2878/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java Fri Jan 18 18:30:54 2013
@@ -33,18 +33,22 @@ public class ScriptAttributeImpl extends
/** Initializes this attribute with <code>UScript.COMMON</code> */
public ScriptAttributeImpl() {}
+ @Override
public int getCode() {
return code;
}
+ @Override
public void setCode(int code) {
this.code = code;
}
+ @Override
public String getName() {
return UScript.getName(code);
}
+ @Override
public String getShortName() {
return UScript.getShortName(code);
}