You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2018/08/11 10:52:15 UTC
lucene-solr:branch_7x: Merge branch 'jira/lucene-8453' of https://github.com/mocobeta/lucene-solr-mirror LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer module This closes #434

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 59e4bf847 -> d8ecf9761


Merge branch 'jira/lucene-8453' of https://github.com/mocobeta/lucene-solr-mirror
LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer module
This closes #434


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d8ecf976
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d8ecf976
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d8ecf976

Branch: refs/heads/branch_7x
Commit: d8ecf976124eb519e1f8c66e6749e246976a95d9
Parents: 59e4bf8
Author: Uwe Schindler <us...@apache.org>
Authored: Sat Aug 11 12:50:19 2018 +0200
Committer: Uwe Schindler <us...@apache.org>
Committed: Sat Aug 11 12:51:43 2018 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 +++
 .../ko/KoreanPartOfSpeechStopFilterFactory.java | 17 ++++++++++++++-
 .../ko/KoreanReadingFormFilterFactory.java      |  8 +++++++
 .../analysis/ko/KoreanTokenizerFactory.java     | 22 ++++++++++++++++++++
 4 files changed, 49 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d8ecf976/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index ac38c29..19d2510 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -119,6 +119,9 @@ Other:
 * LUCENE-8420: Upgrade OpenNLP to 1.9.0 so OpenNLP tool can read the new model format which 1.8.x
   cannot read. 1.9.0 can read the old format. (Koji Sekiguchi)
 
+* LUCENE-8453: Add documentation to analysis factories of Korean (Nori) analyzer
+  module.  (Tomoko Uchida via Uwe Schindler)
+
 ======================= Lucene 7.4.1 =======================
 
 Bug Fixes:

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d8ecf976/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
index 010abc8..70bcef6 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanPartOfSpeechStopFilterFactory.java
@@ -25,12 +25,27 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /**
  * Factory for {@link KoreanPartOfSpeechStopFilter}.
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_ko" class="solr.TextField"&gt;
+ *    &lt;analyzer&gt;
+ *      &lt;tokenizer class="solr.KoreanTokenizerFactory"/&gt;
+ *      &lt;filter class="solr.KoreanPartOfSpeechStopFilterFactory"
+ *              tags="E,J"/&gt;
+ *    &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;
+ * </pre>
+ *
+ * <p>
+ * Supports the following attributes:
+ * <ul>
+ *   <li>tags: List of stop tags. if not specified, {@link KoreanPartOfSpeechStopFilter#DEFAULT_STOP_TAGS} is used.</li>
+ * </ul>
  * @lucene.experimental
  */
 public class KoreanPartOfSpeechStopFilterFactory extends TokenFilterFactory {
   private Set<POS.Tag> stopTags;
 
-  /** Creates a new JapanesePartOfSpeechStopFilterFactory */
+  /** Creates a new KoreanPartOfSpeechStopFilterFactory */
   public KoreanPartOfSpeechStopFilterFactory(Map<String,String> args) {
     super(args);
     Set<String> stopTagStr = getSet(args, "tags");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d8ecf976/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
index 860a139..ce2779b 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanReadingFormFilterFactory.java
@@ -23,6 +23,14 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 /**
  * Factory for {@link KoreanReadingFormFilter}.
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_ko" class="solr.TextField"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.KoreanTokenizerFactory"/&gt;
+ *     &lt;filter class="solr.KoreanReadingFormFilterFactory"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;
+ * </pre>
  * @lucene.experimental
  */
 public class KoreanReadingFormFilterFactory extends TokenFilterFactory {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d8ecf976/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
index 43a02d3..f2fed27 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizerFactory.java
@@ -32,9 +32,31 @@ import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.ko.KoreanTokenizer.DecompoundMode;
 
 /**
  * Factory for {@link KoreanTokenizer}.
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_ko" class="solr.TextField"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.KoreanTokenizerFactory"
+ *                decompoundMode="discard"
+ *                userDictionary="user.txt"
+ *                userDictionaryEncoding="UTF-8"
+ *                outputUnknownUnigrams="false"
+ *     /&gt;
+ *  &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;
+ * </pre>
+ *
+ * <p>
+ * Supports the following attributes:
+ * <ul>
+ *   <li>userDictionary: User dictionary path.</li>
+ *   <li>userDictionaryEncoding: User dictionary encoding.</li>
+ *   <li>decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is discard. See {@link DecompoundMode}</li>
+ *   <li>outputUnknownUnigrams: If true outputs unigrams for unknown words.</li>
+ * </ul>
  * @lucene.experimental
  */
 public class KoreanTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {