You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2017/06/25 02:06:53 UTC
[31/47] lucene-solr:feature/autoscaling: LUCENE-7867: Remove
deprecated Token class
LUCENE-7867: Remove deprecated Token class
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/af1ee47f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/af1ee47f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/af1ee47f
Branch: refs/heads/feature/autoscaling
Commit: af1ee47f2bb2e19f39f7bef4be09e375ca84a52b
Parents: 69d0c70
Author: Alan Woodward <ro...@apache.org>
Authored: Thu Jun 8 14:43:43 2017 +0100
Committer: Alan Woodward <ro...@apache.org>
Committed: Fri Jun 23 09:51:05 2017 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +
.../payloads/NumericPayloadTokenFilter.java | 7 +-
.../payloads/TypeAsPayloadTokenFilter.java | 6 +-
.../java/org/apache/lucene/analysis/Token.java | 210 -------------------
.../org/apache/lucene/analysis/TokenStream.java | 7 +-
.../tokenattributes/CharTermAttributeImpl.java | 5 -
.../lucene/search/highlight/TokenGroup.java | 15 +-
.../queryparser/classic/FastCharStream.java | 2 +-
.../standard/parser/FastCharStream.java | 2 +-
.../surround/parser/FastCharStream.java | 2 +-
.../java/org/apache/lucene/analysis/Token.java | 195 +++++++++++++++++
.../handler/component/SpellCheckComponent.java | 4 +-
.../org/apache/solr/parser/FastCharStream.java | 2 +-
.../spelling/AbstractLuceneSpellChecker.java | 12 +-
.../spelling/ConjunctionSolrSpellChecker.java | 1 -
.../solr/spelling/DirectSolrSpellChecker.java | 1 -
.../solr/spelling/PossibilityIterator.java | 2 -
.../apache/solr/spelling/QueryConverter.java | 7 +-
.../org/apache/solr/spelling/ResultEntry.java | 2 -
.../apache/solr/spelling/SolrSpellChecker.java | 13 +-
.../solr/spelling/SpellCheckCollator.java | 5 +-
.../solr/spelling/SpellCheckCorrection.java | 1 -
.../apache/solr/spelling/SpellingOptions.java | 5 +-
.../solr/spelling/SpellingQueryConverter.java | 1 -
.../apache/solr/spelling/SpellingResult.java | 5 +-
.../solr/spelling/SuggestQueryConverter.java | 2 -
.../java/org/apache/solr/spelling/Token.java | 175 ++++++++++++++++
.../spelling/WordBreakSolrSpellChecker.java | 1 -
.../apache/solr/spelling/suggest/Suggester.java | 2 +-
.../component/DummyCustomParamSpellChecker.java | 14 +-
.../spelling/DirectSolrSpellCheckerTest.java | 1 -
.../spelling/FileBasedSpellCheckerTest.java | 1 -
.../spelling/IndexBasedSpellCheckerTest.java | 1 -
.../solr/spelling/SimpleQueryConverter.java | 8 +-
.../spelling/SpellPossibilityIteratorTest.java | 2 -
.../spelling/SpellingQueryConverterTest.java | 9 +-
.../spelling/TestSuggestSpellingConverter.java | 10 +-
.../spelling/WordBreakSolrSpellCheckerTest.java | 1 -
38 files changed, 429 insertions(+), 313 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 8365017..eede65b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -86,6 +86,9 @@ API Changes
* LUCENE-7877: PrefixAwareTokenStream is replaced with ConcatenatingTokenStream
(Alan Woodward, Uwe Schindler, Adrien Grand)
+* LUCENE-7867: The deprecated Token class is now only available in the test
+ framework (Alan Woodward, Adrien Grand)
+
Bug Fixes
* LUCENE-7626: IndexWriter will no longer accept broken token offsets
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
index e5a4a45..81c5dd4 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
@@ -17,18 +17,17 @@
package org.apache.lucene.analysis.payloads;
+import java.io.IOException;
+
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.BytesRef;
-import java.io.IOException;
-
/**
- * Assigns a payload to a token based on the {@link org.apache.lucene.analysis.Token#type()}
- *
+ * Assigns a payload to a token based on the {@link org.apache.lucene.analysis.tokenattributes.TypeAttribute}
**/
public class NumericPayloadTokenFilter extends TokenFilter {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
index 92fc76a..9dabe79 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
@@ -17,17 +17,17 @@
package org.apache.lucene.analysis.payloads;
+import java.io.IOException;
+
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.BytesRef;
-import java.io.IOException;
-
/**
- * Makes the {@link org.apache.lucene.analysis.Token#type()} a payload.
+ * Makes the {@link TypeAttribute} a payload.
*
* Encodes the type using {@link String#getBytes(String)} with "UTF-8" as the encoding
*
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/lucene/core/src/java/org/apache/lucene/analysis/Token.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/Token.java b/lucene/core/src/java/org/apache/lucene/analysis/Token.java
deleted file mode 100644
index 77ab85e..0000000
--- a/lucene/core/src/java/org/apache/lucene/analysis/Token.java
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis;
-
-
-import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
-import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.util.Attribute;
-import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.AttributeImpl;
-import org.apache.lucene.util.AttributeReflector;
-import org.apache.lucene.util.BytesRef;
-
-/**
- A Token is an occurrence of a term from the text of a field. It consists of
- a term's text, the start and end offset of the term in the text of the field,
- and a type string.
- <p>
- The start and end offsets permit applications to re-associate a token with
- its source text, e.g., to display highlighted query terms in a document
- browser, or to show matching text fragments in a <a href="http://en.wikipedia.org/wiki/Key_Word_in_Context">KWIC</a>
- display, etc.
- <p>
- The type is a string, assigned by a lexical analyzer
- (a.k.a. tokenizer), naming the lexical or syntactic class that the token
- belongs to. For example an end of sentence marker token might be implemented
- with type "eos". The default token type is "word".
- <p>
- A Token can optionally have metadata (a.k.a. payload) in the form of a variable
- length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
- payloads from the index.
-
- <br><br>
-
- <p><b>NOTE:</b> As of 2.9, Token implements all {@link Attribute} interfaces
- that are part of core Lucene and can be found in the {@code tokenattributes} subpackage.
- Even though it is not necessary to use Token anymore, with the new TokenStream API it can
- be used as convenience class that implements all {@link Attribute}s, which is especially useful
- to easily switch from the old to the new TokenStream API.
-
- A few things to note:
- <ul>
- <li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
- <li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
- <li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
- <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
- </ul>
- <p>
- <b>Please note:</b> With Lucene 3.1, the <code>{@linkplain #toString toString()}</code> method had to be changed to match the
- {@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}.
- This method now only prints the term text, no additional information anymore.
- @deprecated This class is outdated and no longer used since Lucene 2.9. Nuke it finally!
-*/
-@Deprecated
-public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
-
- private int flags;
- private BytesRef payload;
-
- /** Constructs a Token will null text. */
- public Token() {
- }
-
- /** Constructs a Token with the given term text, start
- * and end offsets. The type defaults to "word."
- * <b>NOTE:</b> for better indexing speed you should
- * instead use the char[] termBuffer methods to set the
- * term text.
- * @param text term text
- * @param start start offset in the source text
- * @param end end offset in the source text
- */
- public Token(CharSequence text, int start, int end) {
- append(text);
- setOffset(start, end);
- }
-
- /**
- * {@inheritDoc}
- * @see FlagsAttribute
- */
- @Override
- public int getFlags() {
- return flags;
- }
-
- /**
- * {@inheritDoc}
- * @see FlagsAttribute
- */
- @Override
- public void setFlags(int flags) {
- this.flags = flags;
- }
-
- /**
- * {@inheritDoc}
- * @see PayloadAttribute
- */
- @Override
- public BytesRef getPayload() {
- return this.payload;
- }
-
- /**
- * {@inheritDoc}
- * @see PayloadAttribute
- */
- @Override
- public void setPayload(BytesRef payload) {
- this.payload = payload;
- }
-
- /** Resets the term text, payload, flags, positionIncrement, positionLength,
- * startOffset, endOffset and token type to default.
- */
- @Override
- public void clear() {
- super.clear();
- flags = 0;
- payload = null;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (obj == this)
- return true;
-
- if (obj instanceof Token) {
- final Token other = (Token) obj;
- return (
- flags == other.flags &&
- (payload == null ? other.payload == null : payload.equals(other.payload)) &&
- super.equals(obj)
- );
- } else
- return false;
- }
-
- @Override
- public int hashCode() {
- int code = super.hashCode();
- code = code * 31 + flags;
- if (payload != null) {
- code = code * 31 + payload.hashCode();
- }
- return code;
- }
-
- @Override
- public Token clone() {
- final Token t = (Token) super.clone();
- if (payload != null) {
- t.payload = payload.clone();
- }
- return t;
- }
-
- /**
- * Copy the prototype token's fields into this one. Note: Payloads are shared.
- * @param prototype source Token to copy fields from
- */
- public void reinit(Token prototype) {
- // this is a bad hack to emulate no cloning of payload!
- prototype.copyToWithoutPayloadClone(this);
- }
-
- private void copyToWithoutPayloadClone(AttributeImpl target) {
- super.copyTo(target);
- ((FlagsAttribute) target).setFlags(flags);
- ((PayloadAttribute) target).setPayload(payload);
- }
-
- @Override
- public void copyTo(AttributeImpl target) {
- super.copyTo(target);
- ((FlagsAttribute) target).setFlags(flags);
- ((PayloadAttribute) target).setPayload((payload == null) ? null : payload.clone());
- }
-
- @Override
- public void reflectWith(AttributeReflector reflector) {
- super.reflectWith(reflector);
- reflector.reflect(FlagsAttribute.class, "flags", flags);
- reflector.reflect(PayloadAttribute.class, "payload", payload);
- }
-
- /** Convenience factory that returns <code>Token</code> as implementation for the basic
- * attributes and return the default impl (with "Impl" appended) for all other
- * attributes.
- * @since 3.0
- */
- public static final AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
- AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, Token.class);
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java b/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
index af1e7bd..a19d31d 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/TokenStream.java
@@ -40,12 +40,7 @@ import org.apache.lucene.util.AttributeSource;
* <li>{@link TokenFilter}, a <code>TokenStream</code> whose input is another
* <code>TokenStream</code>.
* </ul>
- * A new <code>TokenStream</code> API has been introduced with Lucene 2.9. This API
- * has moved from being {@link Token}-based to {@link Attribute}-based. While
- * {@link Token} still exists in 2.9 as a convenience class, the preferred way
- * to store the information of a {@link Token} is to use {@link AttributeImpl}s.
- * <p>
- * <code>TokenStream</code> now extends {@link AttributeSource}, which provides
+ * <code>TokenStream</code> extends {@link AttributeSource}, which provides
* access to all of the token {@link Attribute}s for the <code>TokenStream</code>.
* Note that only one instance per {@link AttributeImpl} is created and reused
* for every token. This approach reduces object creation and allows local
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
index cde8dd9..9a5b9fa 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/tokenattributes/CharTermAttributeImpl.java
@@ -253,11 +253,6 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
/**
* Returns solely the term text as specified by the
* {@link CharSequence} interface.
- * <p>This method changed the behavior with Lucene 3.1,
- * before it returned a String representation of the whole
- * term with all attributes.
- * This affects especially the
- * {@link org.apache.lucene.analysis.Token} subclass.
*/
@Override
public String toString() {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java
index 6af89f8..ebb37d7 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java
@@ -16,7 +16,6 @@
*/
package org.apache.lucene.search.highlight;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -29,7 +28,6 @@ public class TokenGroup {
private static final int MAX_NUM_TOKENS_PER_GROUP = 50;
- private Token[] tokens = new Token[MAX_NUM_TOKENS_PER_GROUP];
private float[] scores = new float[MAX_NUM_TOKENS_PER_GROUP];
private int numTokens = 0;
private int startOffset = 0;
@@ -68,10 +66,7 @@ public class TokenGroup {
tot += score;
}
}
- Token token = new Token();
- token.setOffset(termStartOffset, termEndOffset);
- token.setEmpty().append(termAtt);
- tokens[numTokens] = token;
+
scores[numTokens] = score;
numTokens++;
}
@@ -87,14 +82,6 @@ public class TokenGroup {
}
/**
- * @param index a value between 0 and numTokens -1
- * @return the "n"th token
- */
- public Token getToken(int index) {
- return tokens[index];
- }
-
- /**
*
* @param index a value between 0 and numTokens -1
* @return the "n"th score
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java
index ad0646b..d528111 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java
@@ -21,7 +21,7 @@ import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link
- * org.apache.lucene.analysis.Token} API.
+ * org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
* */
public final class FastCharStream implements CharStream {
char[] buffer = null;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java
index 06bf9ab..ee0f9af 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java
@@ -21,7 +21,7 @@ import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link
- * org.apache.lucene.analysis.Token} API.
+ * org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
* */
public final class FastCharStream implements CharStream {
char[] buffer = null;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java
index d3cc18b..3a033f5 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/FastCharStream.java
@@ -21,7 +21,7 @@ import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link
- * org.apache.lucene.analysis.Token} API. */
+ * org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API. */
public final class FastCharStream implements CharStream {
char[] buffer = null;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/lucene/test-framework/src/java/org/apache/lucene/analysis/Token.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/Token.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/Token.java
new file mode 100644
index 0000000..04b1df8
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/Token.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ A Token is an occurrence of a term from the text of a field. It consists of
+ a term's text, the start and end offset of the term in the text of the field,
+ and a type string.
+ <p>
+ The start and end offsets permit applications to re-associate a token with
+ its source text, e.g., to display highlighted query terms in a document
+ browser, or to show matching text fragments in a <a href="http://en.wikipedia.org/wiki/Key_Word_in_Context">KWIC</a>
+ display, etc.
+ <p>
+ The type is a string, assigned by a lexical analyzer
+ (a.k.a. tokenizer), naming the lexical or syntactic class that the token
+ belongs to. For example an end of sentence marker token might be implemented
+ with type "eos". The default token type is "word".
+ <p>
+ A Token can optionally have metadata (a.k.a. payload) in the form of a variable
+ length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
+ payloads from the index.
+
+ A few things to note:
+ <ul>
+ <li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
+ <li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
+ <li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
+ <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
+ </ul>
+*/
+public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
+
+ private int flags;
+ private BytesRef payload;
+
+ /** Constructs a Token will null text. */
+ public Token() {
+ }
+
+ /** Constructs a Token with the given term text, start
+ * and end offsets. The type defaults to "word."
+ * <b>NOTE:</b> for better indexing speed you should
+ * instead use the char[] termBuffer methods to set the
+ * term text.
+ * @param text term text
+ * @param start start offset in the source text
+ * @param end end offset in the source text
+ */
+ public Token(CharSequence text, int start, int end) {
+ append(text);
+ setOffset(start, end);
+ }
+
+ /**
+ * {@inheritDoc}
+ * @see FlagsAttribute
+ */
+ @Override
+ public int getFlags() {
+ return flags;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @see FlagsAttribute
+ */
+ @Override
+ public void setFlags(int flags) {
+ this.flags = flags;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @see PayloadAttribute
+ */
+ @Override
+ public BytesRef getPayload() {
+ return this.payload;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @see PayloadAttribute
+ */
+ @Override
+ public void setPayload(BytesRef payload) {
+ this.payload = payload;
+ }
+
+ /** Resets the term text, payload, flags, positionIncrement, positionLength,
+ * startOffset, endOffset and token type to default.
+ */
+ @Override
+ public void clear() {
+ super.clear();
+ flags = 0;
+ payload = null;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == this)
+ return true;
+
+ if (obj instanceof Token) {
+ final Token other = (Token) obj;
+ return (
+ flags == other.flags &&
+ (payload == null ? other.payload == null : payload.equals(other.payload)) &&
+ super.equals(obj)
+ );
+ } else
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ int code = super.hashCode();
+ code = code * 31 + flags;
+ if (payload != null) {
+ code = code * 31 + payload.hashCode();
+ }
+ return code;
+ }
+
+ @Override
+ public Token clone() {
+ final Token t = (Token) super.clone();
+ if (payload != null) {
+ t.payload = BytesRef.deepCopyOf(payload);
+ }
+ return t;
+ }
+
+ /**
+ * Copy the prototype token's fields into this one. Note: Payloads are shared.
+ * @param prototype source Token to copy fields from
+ */
+ public void reinit(Token prototype) {
+ // this is a bad hack to emulate no cloning of payload!
+ prototype.copyToWithoutPayloadClone(this);
+ }
+
+ private void copyToWithoutPayloadClone(AttributeImpl target) {
+ super.copyTo(target);
+ ((FlagsAttribute) target).setFlags(flags);
+ ((PayloadAttribute) target).setPayload(payload);
+ }
+
+ @Override
+ public void copyTo(AttributeImpl target) {
+ super.copyTo(target);
+ ((FlagsAttribute) target).setFlags(flags);
+ ((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload));
+ }
+
+ @Override
+ public void reflectWith(AttributeReflector reflector) {
+ super.reflectWith(reflector);
+ reflector.reflect(FlagsAttribute.class, "flags", flags);
+ reflector.reflect(PayloadAttribute.class, "payload", payload);
+ }
+
+ /** Convenience factory that returns <code>Token</code> as implementation for the basic
+ * attributes and return the default impl (with "Impl" appended) for all other
+ * attributes.
+ * @since 3.0
+ */
+ public static final AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
+ AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, Token.class);
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
index 6aba296..c881aa6 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
@@ -31,7 +31,6 @@ import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -60,8 +59,8 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.QParser;
-import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.SyntaxError;
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
import org.apache.solr.spelling.ConjunctionSolrSpellChecker;
import org.apache.solr.spelling.IndexBasedSpellChecker;
@@ -72,6 +71,7 @@ import org.apache.solr.spelling.SpellCheckCollator;
import org.apache.solr.spelling.SpellingOptions;
import org.apache.solr.spelling.SpellingQueryConverter;
import org.apache.solr.spelling.SpellingResult;
+import org.apache.solr.spelling.Token;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/parser/FastCharStream.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/parser/FastCharStream.java b/solr/core/src/java/org/apache/solr/parser/FastCharStream.java
index 712a283..7039c0a 100644
--- a/solr/core/src/java/org/apache/solr/parser/FastCharStream.java
+++ b/solr/core/src/java/org/apache/solr/parser/FastCharStream.java
@@ -21,7 +21,7 @@ import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link
- * org.apache.lucene.analysis.Token} API.
+ * org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
* */
public final class FastCharStream implements CharStream {
char[] buffer = null;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
index 22da107..a03e911 100644
--- a/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
+++ b/solr/core/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
@@ -16,7 +16,6 @@
*/
package org.apache.solr.spelling;
-import org.apache.lucene.search.spell.StringDistance;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
@@ -24,19 +23,18 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.List;
-import org.apache.lucene.search.spell.SuggestWord;
-import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
-import org.apache.lucene.search.spell.SuggestWordQueue;
-
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.SpellChecker;
+import org.apache.lucene.search.spell.StringDistance;
+import org.apache.lucene.search.spell.SuggestWord;
+import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
+import org.apache.lucene.search.spell.SuggestWordQueue;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
index 881b4d5..2daab28 100644
--- a/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
+++ b/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
@@ -26,7 +26,6 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.search.spell.StringDistance;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
index 15fee72..a1f8df8 100644
--- a/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
+++ b/solr/core/src/java/org/apache/solr/spelling/DirectSolrSpellChecker.java
@@ -22,7 +22,6 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.List;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.DirectSpellChecker;
import org.apache.lucene.search.spell.StringDistance;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java b/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java
index 0203f18..3873e98 100644
--- a/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java
+++ b/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java
@@ -29,8 +29,6 @@ import java.util.NoSuchElementException;
import java.util.PriorityQueue;
import java.util.Set;
-import org.apache.lucene.analysis.Token;
-
/**
* <p>
* Given a list of possible Spelling Corrections for multiple mis-spelled words
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java b/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java
index edb94c4..3c3a42b 100644
--- a/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java
+++ b/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java
@@ -15,13 +15,12 @@
* limitations under the License.
*/
package org.apache.solr.spelling;
+import java.util.Collection;
+
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
-import java.util.Collection;
-
/**
* <p>
* The QueryConverter is an abstract base class defining a method for converting
@@ -81,7 +80,7 @@ public abstract class QueryConverter implements NamedListInitializedPlugin {
}
/**
- * Returns the Collection of {@link org.apache.lucene.analysis.Token}s for
+ * Returns the Collection of {@link Token}s for
* the query. Offsets on the Token should correspond to the correct
* offset in the origQuery
*/
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/ResultEntry.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/ResultEntry.java b/solr/core/src/java/org/apache/solr/spelling/ResultEntry.java
index 4b667cd..dd0310b 100644
--- a/solr/core/src/java/org/apache/solr/spelling/ResultEntry.java
+++ b/solr/core/src/java/org/apache/solr/spelling/ResultEntry.java
@@ -16,8 +16,6 @@
*/
package org.apache.solr.spelling;
-import org.apache.lucene.analysis.Token;
-
public class ResultEntry {
public Token token;
public String suggestion;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java
index db0d5ff..bb461ab 100644
--- a/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java
@@ -15,8 +15,13 @@
* limitations under the License.
*/
package org.apache.solr.spelling;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.StringDistance;
@@ -31,12 +36,6 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.SolrIndexSearcher;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-
/**
* <p>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
index cc38898..859d84f 100644
--- a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
@@ -15,15 +15,12 @@
* limitations under the License.
*/
package org.apache.solr.spelling;
-import static org.apache.solr.common.params.CommonParams.ID;
-
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.CursorMarkParams;
@@ -43,6 +40,8 @@ import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.solr.common.params.CommonParams.ID;
+
public class SpellCheckCollator {
private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private int maxCollations = 1;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/SpellCheckCorrection.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCorrection.java b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCorrection.java
index cae0256..034690b 100644
--- a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCorrection.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCorrection.java
@@ -15,7 +15,6 @@
* limitations under the License.
*/
package org.apache.solr.spelling;
-import org.apache.lucene.analysis.Token;
public class SpellCheckCorrection {
private Token original;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/SpellingOptions.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellingOptions.java b/solr/core/src/java/org/apache/solr/spelling/SpellingOptions.java
index e8dd2f0..b5b42f3 100644
--- a/solr/core/src/java/org/apache/solr/spelling/SpellingOptions.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SpellingOptions.java
@@ -16,13 +16,12 @@
*/
package org.apache.solr.spelling;
-import org.apache.lucene.analysis.Token;
+import java.util.Collection;
+
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.solr.common.params.SolrParams;
-import java.util.Collection;
-
/**
*
*
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java b/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
index 4cc75b5..a5292c7 100644
--- a/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
@@ -23,7 +23,6 @@ import java.util.Collections;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/SpellingResult.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellingResult.java b/solr/core/src/java/org/apache/solr/spelling/SpellingResult.java
index de98c22..fb13bbc 100644
--- a/solr/core/src/java/org/apache/solr/spelling/SpellingResult.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SpellingResult.java
@@ -15,7 +15,6 @@
* limitations under the License.
*/
package org.apache.solr.spelling;
-import org.apache.lucene.analysis.Token;
import java.util.Collection;
import java.util.LinkedHashMap;
@@ -80,7 +79,7 @@ public class SpellingResult {
/**
* Suggestions must be added with the best suggestion first. ORDER is important.
- * @param token The {@link org.apache.lucene.analysis.Token}
+ * @param token The {@link Token}
* @param suggestion The suggestion for the Token
* @param docFreq The document frequency
*/
@@ -97,7 +96,7 @@ public class SpellingResult {
/**
* Gets the suggestions for the given token.
*
- * @param token The {@link org.apache.lucene.analysis.Token} to look up
+ * @param token The {@link Token} to look up
* @return A LinkedHashMap of the suggestions. Key is the suggestion, value is the token frequency in the index, else {@link #NO_FREQUENCY_INFO}.
*
* The suggestions are added in sorted order (i.e. best suggestion first) then the iterator will return the suggestions in order
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java b/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java
index a806973..33ad41e 100644
--- a/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java
@@ -21,8 +21,6 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
-import org.apache.lucene.analysis.Token;
-
/**
* Passes the entire query string to the configured analyzer as-is.
**/
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/Token.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/Token.java b/solr/core/src/java/org/apache/solr/spelling/Token.java
new file mode 100644
index 0000000..b98d350
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/spelling/Token.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.spelling;
+
+
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ A Token is an occurrence of a term from the text of a field. It consists of
+ a term's text, the start and end offset of the term in the text of the field,
+ and a type string.
+ <p>
+ The start and end offsets permit applications to re-associate a token with
+ its source text, e.g., to display highlighted query terms in a document
+ browser, or to show matching text fragments in a <a href="http://en.wikipedia.org/wiki/Key_Word_in_Context">KWIC</a>
+ display, etc.
+ <p>
+ The type is a string, assigned by a lexical analyzer
+ (a.k.a. tokenizer), naming the lexical or syntactic class that the token
+ belongs to. For example an end of sentence marker token might be implemented
+ with type "eos". The default token type is "word".
+ <p>
+ A Token can optionally have metadata (a.k.a. payload) in the form of a variable
+ length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
+ payloads from the index.
+
+ A few things to note:
+ <ul>
+ <li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
+ <li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
+ <li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
+ <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
+ </ul>
+ */
+@Deprecated
+public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute {
+
+ // TODO Refactor the spellchecker API to use TokenStreams properly, rather than this hack
+
+ private int flags;
+ private BytesRef payload;
+
+ /** Constructs a Token will null text. */
+ public Token() {
+ }
+
+ /** Constructs a Token with the given term text, start
+ * and end offsets. The type defaults to "word."
+ * <b>NOTE:</b> for better indexing speed you should
+ * instead use the char[] termBuffer methods to set the
+ * term text.
+ * @param text term text
+ * @param start start offset in the source text
+ * @param end end offset in the source text
+ */
+ public Token(CharSequence text, int start, int end) {
+ append(text);
+ setOffset(start, end);
+ }
+
+ /**
+ * {@inheritDoc}
+ * @see FlagsAttribute
+ */
+ @Override
+ public int getFlags() {
+ return flags;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @see FlagsAttribute
+ */
+ @Override
+ public void setFlags(int flags) {
+ this.flags = flags;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @see PayloadAttribute
+ */
+ @Override
+ public BytesRef getPayload() {
+ return this.payload;
+ }
+
+ /**
+ * {@inheritDoc}
+ * @see PayloadAttribute
+ */
+ @Override
+ public void setPayload(BytesRef payload) {
+ this.payload = payload;
+ }
+
+ /** Resets the term text, payload, flags, positionIncrement, positionLength,
+ * startOffset, endOffset and token type to default.
+ */
+ @Override
+ public void clear() {
+ super.clear();
+ flags = 0;
+ payload = null;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == this)
+ return true;
+
+ if (obj instanceof Token) {
+ final Token other = (Token) obj;
+ return (
+ flags == other.flags &&
+ (payload == null ? other.payload == null : payload.equals(other.payload)) &&
+ super.equals(obj)
+ );
+ } else
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ int code = super.hashCode();
+ code = code * 31 + flags;
+ if (payload != null) {
+ code = code * 31 + payload.hashCode();
+ }
+ return code;
+ }
+
+ @Override
+ public Token clone() {
+ final Token t = (Token) super.clone();
+ if (payload != null) {
+ t.payload = BytesRef.deepCopyOf(payload);
+ }
+ return t;
+ }
+
+ @Override
+ public void copyTo(AttributeImpl target) {
+ super.copyTo(target);
+ ((FlagsAttribute) target).setFlags(flags);
+ ((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload));
+ }
+
+ @Override
+ public void reflectWith(AttributeReflector reflector) {
+ super.reflectWith(reflector);
+ reflector.reflect(FlagsAttribute.class, "flags", flags);
+ reflector.reflect(PayloadAttribute.class, "payload", payload);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
index a5d692b..f96233f 100644
--- a/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
+++ b/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
@@ -24,7 +24,6 @@ import java.util.List;
import java.util.Locale;
import java.util.regex.Pattern;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spell.CombineSuggestion;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java b/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
index d585fed..c0e7709 100644
--- a/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
+++ b/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
@@ -28,7 +28,6 @@ import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.HighFrequencyDictionary;
@@ -47,6 +46,7 @@ import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.SolrSpellChecker;
import org.apache.solr.spelling.SpellingOptions;
import org.apache.solr.spelling.SpellingResult;
+import org.apache.solr.spelling.Token;
import org.apache.solr.spelling.suggest.fst.FSTLookupFactory;
import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory;
import org.apache.solr.spelling.suggest.tst.TSTLookupFactory;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java b/solr/core/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java
index 30924c3..10f7cc0 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java
@@ -16,18 +16,18 @@
*/
package org.apache.solr.handler.component;
-import org.apache.lucene.analysis.Token;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.search.SolrIndexSearcher;
-import org.apache.solr.spelling.SolrSpellChecker;
-import org.apache.solr.spelling.SpellingOptions;
-import org.apache.solr.spelling.SpellingResult;
-
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
+
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.spelling.SolrSpellChecker;
+import org.apache.solr.spelling.SpellingOptions;
+import org.apache.solr.spelling.SpellingResult;
+import org.apache.solr.spelling.Token;
/**
* A Dummy SpellChecker for testing purposes
*
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java b/solr/core/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java
index 1cde8d2..cb5bba7 100644
--- a/solr/core/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java
+++ b/solr/core/src/test/org/apache/solr/spelling/DirectSolrSpellCheckerTest.java
@@ -19,7 +19,6 @@ package org.apache.solr.spelling;
import java.util.Collection;
import java.util.Map;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.SpellingParams;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java b/solr/core/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java
index f107024..800a2a0 100644
--- a/solr/core/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java
+++ b/solr/core/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java
@@ -20,7 +20,6 @@ import java.io.File;
import java.util.Collection;
import java.util.Map;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
import org.apache.solr.SolrTestCaseJ4;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java b/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
index b221044..0819083 100644
--- a/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
+++ b/solr/core/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
@@ -22,7 +22,6 @@ import java.util.Comparator;
import java.util.Date;
import java.util.Map;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java b/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java
index d2877d9..8d91a1b 100644
--- a/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java
+++ b/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java
@@ -16,7 +16,10 @@
*/
package org.apache.solr.spelling;
-import org.apache.lucene.analysis.Token;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -25,9 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import java.util.Collection;
-import java.util.HashSet;
-import java.io.IOException;
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java b/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java
index 1e69b73..ff53e04 100644
--- a/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java
+++ b/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java
@@ -20,9 +20,7 @@ import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.analysis.Token;
import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.spelling.PossibilityIterator;
import org.junit.Before;
import org.junit.Test;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java b/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
index 821fe73..11a31c2 100644
--- a/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
+++ b/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
@@ -16,16 +16,15 @@
*/
package org.apache.solr.spelling;
-import org.apache.lucene.analysis.Token;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.common.util.NamedList;
import org.junit.Test;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
/**
* Test for SpellingQueryConverter
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java b/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java
index fdf64ff..0e4a011 100644
--- a/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java
+++ b/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java
@@ -22,11 +22,9 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
@@ -65,7 +63,11 @@ public class TestSuggestSpellingConverter extends BaseTokenStreamTestCase {
public void assertConvertsTo(String text, String expected[]) throws IOException {
Collection<Token> tokens = converter.convert(text);
- TokenStream ts = new CannedTokenStream(tokens.toArray(new Token[0]));
- assertTokenStreamContents(ts, expected);
+ assertEquals(tokens.size(), expected.length);
+ int i = 0;
+ for (Token token : tokens) {
+ assertEquals(token.toString(), expected[i]);
+ i++;
+ }
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/af1ee47f/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java b/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java
index 92e06bb..f24bcba 100644
--- a/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java
+++ b/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java
@@ -21,7 +21,6 @@ import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.util.NamedList;