You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by so...@apache.org on 2023/01/01 15:58:55 UTC
[lucene] branch main updated: fix typo analysis-kuromoji (#12047)
This is an automated email from the ASF dual-hosted git repository.
sokolov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 4676a735c18 fix typo analysis-kuromoji (#12047)
4676a735c18 is described below
commit 4676a735c18dd89c1a0234e23fd7d76152a75a5d
Author: twosom <72...@users.noreply.github.com>
AuthorDate: Mon Jan 2 00:58:50 2023 +0900
fix typo analysis-kuromoji (#12047)
---
.../org/apache/lucene/analysis/ja/JapaneseNumberFilter.java | 10 +++++-----
.../lucene/analysis/ja/completion/CharSequenceUtils.java | 2 +-
.../org/apache/lucene/analysis/ja/completion/romaji_map.txt | 6 +++---
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
index 7b01751a3ae..73edc374bb8 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
@@ -39,9 +39,9 @@ import org.apache.lucene.util.IgnoreRandomChains;
* <p>Notice that this analyzer uses a token composition scheme and relies on punctuation tokens
* being found in the token stream. Please make sure your {@link JapaneseTokenizer} has {@code
* discardPunctuation} set to false. In case punctuation characters, such as . (U+FF0E FULLWIDTH
- * FULL STOP), is removed from the token stream, this filter would find input tokens tokens 3 and 2千
- * and give outputs 3 and 2000 instead of 3200, which is likely not the intended result. If you want
- * to remove punctuation characters from your index that are not part of normalized numbers, add a
+ * FULL STOP), is removed from the token stream, this filter would find input tokens 3 and 2千 and
+ * give outputs 3 and 2000 instead of 3200, which is likely not the intended result. If you want to
+ * remove punctuation characters from your index that are not part of normalized numbers, add a
* {@link org.apache.lucene.analysis.StopFilter} with the punctuation you wish to remove after
* {@link JapaneseNumberFilter} in your analyzer chain.
*
@@ -59,8 +59,8 @@ import org.apache.lucene.util.IgnoreRandomChains;
* <li>15,7 becomes 157 (be aware of this weakness)
* </ul>
*
- * <p>Tokens preceded by a token with {@link PositionIncrementAttribute} of zero are left left
- * untouched and emitted as-is.
+ * <p>Tokens preceded by a token with {@link PositionIncrementAttribute} of zero are left untouched
+ * and emitted as-is.
*
* <p>This filter does not use any part-of-speech information for its normalization and the
* motivation for this is to also support n-grammed token streams in the future.
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/CharSequenceUtils.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/CharSequenceUtils.java
index 7130e6a5577..e25b4b402a6 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/CharSequenceUtils.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/completion/CharSequenceUtils.java
@@ -72,7 +72,7 @@ public class CharSequenceUtils {
return ch >= 0xff41 && ch <= 0xff5a;
}
- /** Convert all hiragana in a string into kanataka */
+ /** Convert all hiragana in a string into Katakana */
public static String toKatakana(CharSequence s) {
char[] chars = new char[s.length()];
for (int i = 0; i < s.length(); i++) {
diff --git a/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/completion/romaji_map.txt b/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/completion/romaji_map.txt
index 4a803a469ad..8136668e708 100644
--- a/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/completion/romaji_map.txt
+++ b/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/completion/romaji_map.txt
@@ -1,8 +1,8 @@
-# mapping rules of katakana (an unit of keystroke) to list of acceptable romanizations.
+# mapping rules of katakana (a unit of keystroke) to list of acceptable romanizations.
# longest-match is used to find entries in this list.
# covers romanization systems: modified Hepburn-shiki, Kunrei-shiki (Nihon-shiki), and Wāpuro shiki.
# note: this does not strictly comply with the romanization systems listed above,
-# but tries to cover possible keystoroke supported by various Input Methods.
+# but tries to cover possible keystroke supported by various Input Methods.
ア,a
イ,i
@@ -341,4 +341,4 @@
# Chōonpu (Katakana-Hiragana Prolonged Sound Mark)
ー,ー
# Interpunct (Middle Dot)
-・,・
\ No newline at end of file
+・,・