You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2018/09/20 10:56:57 UTC

lucene-solr:branch_7x: LUCENE-8498: Deprecate LowerCaseTokenizer and CharTokenizer normalizer methods

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 26f6956cd -> 99df2bd2f


LUCENE-8498: Deprecate LowerCaseTokenizer and CharTokenizer normalizer methods


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/99df2bd2
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/99df2bd2
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/99df2bd2

Branch: refs/heads/branch_7x
Commit: 99df2bd2f350b328ce36beb7a6fe76ac5316e551
Parents: 26f6956
Author: Alan Woodward <ro...@apache.org>
Authored: Thu Sep 20 09:07:39 2018 +0100
Committer: Alan Woodward <ro...@apache.org>
Committed: Thu Sep 20 09:07:39 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                                   |  3 +++
 .../lucene/analysis/core/LowerCaseTokenizer.java     |  3 +++
 .../analysis/core/LowerCaseTokenizerFactory.java     |  3 +++
 .../apache/lucene/analysis/util/CharTokenizer.java   | 15 +++++++++++++++
 4 files changed, 24 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/99df2bd2/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 05098f9..4c01ba5 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -9,6 +9,9 @@ Build
 
 * LUCENE-8504: Upgrade forbiddenapis to version 2.6.  (Uwe Schindler)
 
+* LUCENE-8498: Deprecate LowerCaseTokenizer and CharTokenizer static methods
+  that take normalizer functions (Alan Woodward)
+
 ======================= Lucene 7.5.1 =======================
 
 Bug Fixes:

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/99df2bd2/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
index 26b8747..b7bea36 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
@@ -30,7 +30,10 @@ import org.apache.lucene.util.AttributeFactory;
  * Note: this does a decent job for most European languages, but does a terrible
  * job for some Asian languages, where words are not separated by spaces.
  * </p>
+ *
+ * @deprecated Use {@link LetterTokenizer} followed by {@link LowerCaseFilter}
  */
+@Deprecated
 public final class LowerCaseTokenizer extends LetterTokenizer {
   
   /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/99df2bd2/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
index 44e2742..8afd78a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
@@ -43,7 +43,10 @@ import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LE
  *     It is rare to need to change this
  * else {@link CharTokenizer}::DEFAULT_MAX_WORD_LEN</li>
  * </ul>
+ *
+ * @deprecated Use {@link LetterTokenizerFactory} followed by {@link LowerCaseFilterFactory}
  */
+@Deprecated
 public class LowerCaseTokenizerFactory extends TokenizerFactory implements MultiTermAwareComponent {
   private final int maxTokenLen;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/99df2bd2/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
index ff9d6ff..600aafd 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
@@ -120,7 +120,10 @@ public abstract class CharTokenizer extends Tokenizer {
    * <pre class="prettyprint lang-java">
    * Tokenizer tok = CharTokenizer.fromTokenCharPredicate(Character::isLetter, Character::toLowerCase);
    * </pre>
+   *
+   * @deprecated Normalization should be done in a subsequent TokenFilter
    */
+  @Deprecated
   public static CharTokenizer fromTokenCharPredicate(final IntPredicate tokenCharPredicate, final IntUnaryOperator normalizer) {
     return fromTokenCharPredicate(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, tokenCharPredicate, normalizer);
   }
@@ -135,7 +138,10 @@ public abstract class CharTokenizer extends Tokenizer {
    * <pre class="prettyprint lang-java">
    * Tokenizer tok = CharTokenizer.fromTokenCharPredicate(factory, Character::isLetter, Character::toLowerCase);
    * </pre>
+   *
+   * @deprecated Normalization should be done in a subsequent TokenFilter
    */
+  @Deprecated
   public static CharTokenizer fromTokenCharPredicate(AttributeFactory factory, final IntPredicate tokenCharPredicate, final IntUnaryOperator normalizer) {
     Objects.requireNonNull(tokenCharPredicate, "predicate must not be null.");
     Objects.requireNonNull(normalizer, "normalizer must not be null");
@@ -192,7 +198,10 @@ public abstract class CharTokenizer extends Tokenizer {
    * <pre class="prettyprint lang-java">
    * Tokenizer tok = CharTokenizer.fromSeparatorCharPredicate(Character::isWhitespace, Character::toLowerCase);
    * </pre>
+   *
+   * @deprecated Normalization should be done in a subsequent TokenFilter
    */
+  @Deprecated
   public static CharTokenizer fromSeparatorCharPredicate(final IntPredicate separatorCharPredicate, final IntUnaryOperator normalizer) {
     return fromSeparatorCharPredicate(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, separatorCharPredicate, normalizer);
   }
@@ -207,7 +216,10 @@ public abstract class CharTokenizer extends Tokenizer {
    * <pre class="prettyprint lang-java">
    * Tokenizer tok = CharTokenizer.fromSeparatorCharPredicate(factory, Character::isWhitespace, Character::toLowerCase);
    * </pre>
+   *
+   * @deprecated Normalization should be done in a subsequent TokenFilter
    */
+  @Deprecated
   public static CharTokenizer fromSeparatorCharPredicate(AttributeFactory factory, final IntPredicate separatorCharPredicate, final IntUnaryOperator normalizer) {
     return fromTokenCharPredicate(factory, separatorCharPredicate.negate(), normalizer);
   }
@@ -234,7 +246,10 @@ public abstract class CharTokenizer extends Tokenizer {
    * Called on each token character to normalize it before it is added to the
    * token. The default implementation does nothing. Subclasses may use this to,
    * e.g., lowercase tokens.
+   *
+   * @deprecated Normalization should be done in a subsequent TokenFilter
    */
+  @Deprecated
   protected int normalize(int c) {
     return c;
   }