You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2018/09/20 10:56:57 UTC
lucene-solr:branch_7x: LUCENE-8498: Deprecate LowerCaseTokenizer and
CharTokenizer normalizer methods
Repository: lucene-solr
Updated Branches:
refs/heads/branch_7x 26f6956cd -> 99df2bd2f
LUCENE-8498: Deprecate LowerCaseTokenizer and CharTokenizer normalizer methods
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/99df2bd2
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/99df2bd2
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/99df2bd2
Branch: refs/heads/branch_7x
Commit: 99df2bd2f350b328ce36beb7a6fe76ac5316e551
Parents: 26f6956
Author: Alan Woodward <ro...@apache.org>
Authored: Thu Sep 20 09:07:39 2018 +0100
Committer: Alan Woodward <ro...@apache.org>
Committed: Thu Sep 20 09:07:39 2018 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +++
.../lucene/analysis/core/LowerCaseTokenizer.java | 3 +++
.../analysis/core/LowerCaseTokenizerFactory.java | 3 +++
.../apache/lucene/analysis/util/CharTokenizer.java | 15 +++++++++++++++
4 files changed, 24 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/99df2bd2/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 05098f9..4c01ba5 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -9,6 +9,9 @@ Build
* LUCENE-8504: Upgrade forbiddenapis to version 2.6. (Uwe Schindler)
+* LUCENE-8498: Deprecate LowerCaseTokenizer and CharTokenizer static methods
+ that take normalizer functions (Alan Woodward)
+
======================= Lucene 7.5.1 =======================
Bug Fixes:
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/99df2bd2/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
index 26b8747..b7bea36 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
@@ -30,7 +30,10 @@ import org.apache.lucene.util.AttributeFactory;
* Note: this does a decent job for most European languages, but does a terrible
* job for some Asian languages, where words are not separated by spaces.
* </p>
+ *
+ * @deprecated Use {@link LetterTokenizer} followed by {@link LowerCaseFilter}
*/
+@Deprecated
public final class LowerCaseTokenizer extends LetterTokenizer {
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/99df2bd2/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
index 44e2742..8afd78a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
@@ -43,7 +43,10 @@ import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LE
* It is rare to need to change this
* else {@link CharTokenizer}::DEFAULT_MAX_WORD_LEN</li>
* </ul>
+ *
+ * @deprecated Use {@link LetterTokenizerFactory} followed by {@link LowerCaseFilterFactory}
*/
+@Deprecated
public class LowerCaseTokenizerFactory extends TokenizerFactory implements MultiTermAwareComponent {
private final int maxTokenLen;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/99df2bd2/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
index ff9d6ff..600aafd 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
@@ -120,7 +120,10 @@ public abstract class CharTokenizer extends Tokenizer {
* <pre class="prettyprint lang-java">
* Tokenizer tok = CharTokenizer.fromTokenCharPredicate(Character::isLetter, Character::toLowerCase);
* </pre>
+ *
+ * @deprecated Normalization should be done in a subsequent TokenFilter
*/
+ @Deprecated
public static CharTokenizer fromTokenCharPredicate(final IntPredicate tokenCharPredicate, final IntUnaryOperator normalizer) {
return fromTokenCharPredicate(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, tokenCharPredicate, normalizer);
}
@@ -135,7 +138,10 @@ public abstract class CharTokenizer extends Tokenizer {
* <pre class="prettyprint lang-java">
* Tokenizer tok = CharTokenizer.fromTokenCharPredicate(factory, Character::isLetter, Character::toLowerCase);
* </pre>
+ *
+ * @deprecated Normalization should be done in a subsequent TokenFilter
*/
+ @Deprecated
public static CharTokenizer fromTokenCharPredicate(AttributeFactory factory, final IntPredicate tokenCharPredicate, final IntUnaryOperator normalizer) {
Objects.requireNonNull(tokenCharPredicate, "predicate must not be null.");
Objects.requireNonNull(normalizer, "normalizer must not be null");
@@ -192,7 +198,10 @@ public abstract class CharTokenizer extends Tokenizer {
* <pre class="prettyprint lang-java">
* Tokenizer tok = CharTokenizer.fromSeparatorCharPredicate(Character::isWhitespace, Character::toLowerCase);
* </pre>
+ *
+ * @deprecated Normalization should be done in a subsequent TokenFilter
*/
+ @Deprecated
public static CharTokenizer fromSeparatorCharPredicate(final IntPredicate separatorCharPredicate, final IntUnaryOperator normalizer) {
return fromSeparatorCharPredicate(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, separatorCharPredicate, normalizer);
}
@@ -207,7 +216,10 @@ public abstract class CharTokenizer extends Tokenizer {
* <pre class="prettyprint lang-java">
* Tokenizer tok = CharTokenizer.fromSeparatorCharPredicate(factory, Character::isWhitespace, Character::toLowerCase);
* </pre>
+ *
+ * @deprecated Normalization should be done in a subsequent TokenFilter
*/
+ @Deprecated
public static CharTokenizer fromSeparatorCharPredicate(AttributeFactory factory, final IntPredicate separatorCharPredicate, final IntUnaryOperator normalizer) {
return fromTokenCharPredicate(factory, separatorCharPredicate.negate(), normalizer);
}
@@ -234,7 +246,10 @@ public abstract class CharTokenizer extends Tokenizer {
* Called on each token character to normalize it before it is added to the
* token. The default implementation does nothing. Subclasses may use this to,
* e.g., lowercase tokens.
+ *
+ * @deprecated Normalization should be done in a subsequent TokenFilter
*/
+ @Deprecated
protected int normalize(int c) {
return c;
}