You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by so...@apache.org on 2023/01/01 15:58:55 UTC
[lucene] branch main updated: fix typo analysis-kuromoji (#12047)

This is an automated email from the ASF dual-hosted git repository.

sokolov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 4676a735c18 fix typo analysis-kuromoji (#12047)
4676a735c18 is described below

commit 4676a735c18dd89c1a0234e23fd7d76152a75a5d
Author: twosom <72...@users.noreply.github.com>
AuthorDate: Mon Jan 2 00:58:50 2023 +0900

    fix typo analysis-kuromoji (#12047)
---
 .../org/apache/lucene/analysis/ja/JapaneseNumberFilter.java    | 10 +++++-----
 .../lucene/analysis/ja/completion/CharSequenceUtils.java       |  2 +-
 .../org/apache/lucene/analysis/ja/completion/romaji_map.txt    |  6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
index 7b01751a3ae..73edc374bb8 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
@@ -39,9 +39,9 @@ import org.apache.lucene.util.IgnoreRandomChains;
  * <p>Notice that this analyzer uses a token composition scheme and relies on punctuation tokens
  * being found in the token stream. Please make sure your {@link JapaneseTokenizer} has {@code
  * discardPunctuation} set to false. In case punctuation characters, such as ． (U+FF0E FULLWIDTH
- * FULL STOP), is removed from the token stream, this filter would find input tokens tokens ３ and ２千
- * and give outputs 3 and 2000 instead of 3200, which is likely not the intended result. If you want
- * to remove punctuation characters from your index that are not part of normalized numbers, add a
+ * FULL STOP), is removed from the token stream, this filter would find input tokens ３ and ２千 and
+ * give outputs 3 and 2000 instead of 3200, which is likely not the intended result. If you want to
+ * remove punctuation characters from your index that are not part of normalized numbers, add a
  * {@link org.apache.lucene.analysis.StopFilter} with the punctuation you wish to remove after
  * {@link JapaneseNumberFilter} in your analyzer chain.
  *
@@ -59,8 +59,8 @@ import org.apache.lucene.util.IgnoreRandomChains;
  *   <li>15,7 becomes 157 (be aware of this weakness)
  * </ul>
  *
- * <p>Tokens preceded by a token with {@link PositionIncrementAttribute} of zero are left left
- * untouched and emitted as-is.
+ * <p>Tokens preceded by a token with {@link PositionIncrementAttribute} of zero are left untouched
+ * and emitted as-is.
  *
  * <p>This filter does not use any part-of-speech information for its normalization and the
  * motivation for this is to also support n-grammed token streams in the future.
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/CharSequenceUtils.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/CharSequenceUtils.java
index 7130e6a5577..e25b4b402a6 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/CharSequenceUtils.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/CharSequenceUtils.java
@@ -72,7 +72,7 @@ public class CharSequenceUtils {
     return ch >= 0xff41 && ch <= 0xff5a;
   }
 
-  /** Convert all hiragana in a string into kanataka */
+  /** Convert all hiragana in a string into Katakana */
   public static String toKatakana(CharSequence s) {
     char[] chars = new char[s.length()];
     for (int i = 0; i < s.length(); i++) {
diff --git a/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/completion/romaji_map.txt b/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/completion/romaji_map.txt
index 4a803a469ad..8136668e708 100644
--- a/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/completion/romaji_map.txt
+++ b/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/completion/romaji_map.txt
@@ -1,8 +1,8 @@
-# mapping rules of katakana (an unit of keystroke) to list of acceptable romanizations.
+# mapping rules of katakana (a unit of keystroke) to list of acceptable romanizations.
 # longest-match is used to find entries in this list.
 # covers romanization systems: modified Hepburn-shiki, Kunrei-shiki (Nihon-shiki), and Wāpuro shiki.
 # note: this does not strictly comply with the romanization systems listed above,
-# but tries to cover possible keystoroke supported by various Input Methods.
+# but tries to cover possible keystroke supported by various Input Methods.
 
 ア,a
 イ,i
@@ -341,4 +341,4 @@
 # Chōonpu (Katakana-Hiragana Prolonged Sound Mark)
 ー,ー
 # Interpunct (Middle Dot)
-・,・
\ No newline at end of file
+・,・