You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2018/08/11 10:52:15 UTC
lucene-solr:branch_7x: Merge branch 'jira/lucene-8453' of
https://github.com/mocobeta/lucene-solr-mirror LUCENE-8453: Add documentation
to analysis factories of Korean (Nori) analyzer module This closes #434
Repository: lucene-solr
Updated Branches:
refs/heads/branch_7x 59e4bf847 -> d8ecf9761
Merge branch 'jira/lucene-8453' of https://github.com/mocobeta/lucene-solr-mirror
LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer module
This closes #434
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d8ecf976
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d8ecf976
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d8ecf976
Branch: refs/heads/branch_7x
Commit: d8ecf976124eb519e1f8c66e6749e246976a95d9
Parents: 59e4bf8
Author: Uwe Schindler <us...@apache.org>
Authored: Sat Aug 11 12:50:19 2018 +0200
Committer: Uwe Schindler <us...@apache.org>
Committed: Sat Aug 11 12:51:43 2018 +0200
----------------------------------------------------------------------
lucene/CHANGES.txt | 3 +++
.../ko/KoreanPartOfSpeechStopFilterFactory.java | 17 ++++++++++++++-
.../ko/KoreanReadingFormFilterFactory.java | 8 +++++++
.../analysis/ko/KoreanTokenizerFactory.java | 22 ++++++++++++++++++++
4 files changed, 49 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d8ecf976/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index ac38c29..19d2510 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -119,6 +119,9 @@ Other:
* LUCENE-8420: Upgrade OpenNLP to 1.9.0 so OpenNLP tool can read the new model format which 1.8.x
cannot read. 1.9.0 can read the old format. (Koji Sekiguchi)
+* LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer
+ module. (Tomoko Uchida via Uwe Schindler)
+
======================= Lucene 7.4.1 =======================
Bug Fixes:
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d8ecf976/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
index 010abc8..70bcef6 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
@@ -25,12 +25,27 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link KoreanPartOfSpeechStopFilter}.
+ * <pre class="prettyprint">
+ * <fieldType name="text_ko" class="solr.TextField">
+ * <analyzer>
+ * <tokenizer class="solr.KoreanTokenizerFactory"/>
+ * <filter class="solr.KoreanPartOfSpeechStopFilterFactory"
+ * tags="E,J"/>
+ * </analyzer>
+ * </fieldType>
+ * </pre>
+ *
+ * <p>
+ * Supports the following attributes:
+ * <ul>
+ * <li>tags: List of stop tags. if not specified, {@link KoreanPartOfSpeechStopFilter#DEFAULT_STOP_TAGS} is used.</li>
+ * </ul>
* @lucene.experimental
*/
public class KoreanPartOfSpeechStopFilterFactory extends TokenFilterFactory {
private Set<POS.Tag> stopTags;
- /** Creates a new JapanesePartOfSpeechStopFilterFactory */
+ /** Creates a new KoreanPartOfSpeechStopFilterFactory */
public KoreanPartOfSpeechStopFilterFactory(Map<String,String> args) {
super(args);
Set<String> stopTagStr = getSet(args, "tags");
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d8ecf976/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
index 860a139..ce2779b 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
@@ -23,6 +23,14 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
/**
* Factory for {@link KoreanReadingFormFilter}.
+ * <pre class="prettyprint">
+ * <fieldType name="text_ko" class="solr.TextField">
+ * <analyzer>
+ * <tokenizer class="solr.KoreanTokenizerFactory"/>
+ * <filter class="solr.KoreanReadingFormFilterFactory"/>
+ * </analyzer>
+ * </fieldType>
+ * </pre>
* @lucene.experimental
*/
public class KoreanReadingFormFilterFactory extends TokenFilterFactory {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d8ecf976/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
index 43a02d3..f2fed27 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
@@ -32,9 +32,31 @@ import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.ko.KoreanTokenizer.DecompoundMode;
/**
* Factory for {@link KoreanTokenizer}.
+ * <pre class="prettyprint">
+ * <fieldType name="text_ko" class="solr.TextField">
+ * <analyzer>
+ * <tokenizer class="solr.KoreanTokenizerFactory"
+ * decompoundMode="discard"
+ * userDictionary="user.txt"
+ * userDictionaryEncoding="UTF-8"
+ * outputUnknownUnigrams="false"
+ * />
+ * </analyzer>
+ * </fieldType>
+ * </pre>
+ *
+ * <p>
+ * Supports the following attributes:
+ * <ul>
+ * <li>userDictionary: User dictionary path.</li>
+ * <li>userDictionaryEncoding: User dictionary encoding.</li>
+ * <li>decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is discard. See {@link DecompoundMode}</li>
+ * <li>outputUnknownUnigrams: If true outputs unigrams for unknown words.</li>
+ * </ul>
* @lucene.experimental
*/
public class KoreanTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {