You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2018/08/11 12:09:42 UTC
lucene-solr:branch_7x: SOLR-12655: Add Korean morphological analyzer
("nori") to default distribution. This also adds examples for configuration
in Solr's schema
Repository: lucene-solr
Updated Branches:
refs/heads/branch_7x 924114329 -> 489a91577
SOLR-12655: Add Korean morphological analyzer ("nori") to default distribution. This also adds examples for configuration in Solr's schema
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/489a9157
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/489a9157
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/489a9157
Branch: refs/heads/branch_7x
Commit: 489a9157791efe8d26f86fc448bed5992b3d2d5f
Parents: 9241143
Author: Uwe Schindler <us...@apache.org>
Authored: Sat Aug 11 14:07:31 2018 +0200
Committer: Uwe Schindler <us...@apache.org>
Committed: Sat Aug 11 14:08:39 2018 +0200
----------------------------------------------------------------------
solr/CHANGES.txt | 3 ++
solr/common-build.xml | 6 ++--
.../configsets/_default/conf/managed-schema | 34 ++++++++++++++++++++
.../example-DIH/solr/db/conf/managed-schema | 34 ++++++++++++++++++++
.../example-DIH/solr/mail/conf/managed-schema | 34 ++++++++++++++++++++
.../example-DIH/solr/solr/conf/managed-schema | 34 ++++++++++++++++++++
solr/example/files/conf/managed-schema | 9 ++++++
.../configsets/_default/conf/managed-schema | 34 ++++++++++++++++++++
.../conf/managed-schema | 34 ++++++++++++++++++++
9 files changed, 220 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/489a9157/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index dbb5289..900621a 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -112,6 +112,9 @@ New Features
* SOLR-12485: Uploading docs in XML now supports child documents as field values, thus providing a label to the
relationship instead of the current "anonymous" relationship. (Moshe Bla, David Smiley)
+* SOLR-12655: Add Korean morphological analyzer ("nori") to default distribution. This also adds examples
+ for configuration in Solr's schema. (Uwe Schindler)
+
Bug Fixes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/489a9157/solr/common-build.xml
----------------------------------------------------------------------
diff --git a/solr/common-build.xml b/solr/common-build.xml
index bfa1a86..2a0ad51 100644
--- a/solr/common-build.xml
+++ b/solr/common-build.xml
@@ -94,6 +94,7 @@
-->
<pathelement location="${analyzers-common.jar}"/>
<pathelement location="${analyzers-kuromoji.jar}"/>
+ <pathelement location="${analyzers-nori.jar}"/>
<pathelement location="${analyzers-phonetic.jar}"/>
<pathelement location="${codecs.jar}"/>
<pathelement location="${backward-codecs.jar}"/>
@@ -171,7 +172,7 @@
<target name="prep-lucene-jars"
depends="resolve-groovy,
- jar-lucene-core, jar-backward-codecs, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-codecs,jar-expressions, jar-suggest, jar-highlighter, jar-memory,
+ jar-lucene-core, jar-backward-codecs, jar-analyzers-phonetic, jar-analyzers-kuromoji, jar-analyzers-nori, jar-codecs,jar-expressions, jar-suggest, jar-highlighter, jar-memory,
jar-misc, jar-spatial-extras, jar-spatial3d, jar-grouping, jar-queries, jar-queryparser, jar-join, jar-sandbox, jar-classification">
<property name="solr.deps.compiled" value="true"/>
</target>
@@ -248,7 +249,7 @@
<property name="lucenedocs" location="${common.dir}/build/docs"/>
<!-- dependency to ensure all lucene javadocs are present -->
- <target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-backward-codecs,javadocs-codecs,javadocs-expressions,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial-extras,javadocs-join,javadocs-test-framework"/>
+ <target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-nori,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-backward-codecs,javadocs-codecs,javadocs-expressions,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial-extras,javadocs-join,javadocs-test-framework"/>
<!-- create javadocs for the current module -->
<target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs,javadocs-solr-core,check-javadocs-uptodate" unless="javadocs-uptodate-${name}">
@@ -309,6 +310,7 @@
<link offline="true" href="${lucene.javadoc.url}analyzers-common" packagelistloc="${lucenedocs}/analyzers-common"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-icu" packagelistloc="${lucenedocs}/analyzers-icu"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-kuromoji" packagelistloc="${lucenedocs}/analyzers-kuromoji"/>
+ <link offline="true" href="${lucene.javadoc.url}analyzers-nori" packagelistloc="${lucenedocs}/analyzers-nori"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-morfologik" packagelistloc="${lucenedocs}/analyzers-morfologik"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-phonetic" packagelistloc="${lucenedocs}/analyzers-phonetic"/>
<link offline="true" href="${lucene.javadoc.url}analyzers-smartcn" packagelistloc="${lucenedocs}/analyzers-smartcn"/>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/489a9157/solr/core/src/test-files/solr/configsets/_default/conf/managed-schema
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/configsets/_default/conf/managed-schema b/solr/core/src/test-files/solr/configsets/_default/conf/managed-schema
index 1a04009..95c0c36 100644
--- a/solr/core/src/test-files/solr/configsets/_default/conf/managed-schema
+++ b/solr/core/src/test-files/solr/configsets/_default/conf/managed-schema
@@ -849,6 +849,40 @@
</analyzer>
</fieldType>
+ <!-- Korean morphological analysis -->
+ <dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/>
+ <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <!-- Nori Korean morphological analyzer/tokenizer (KoreanTokenizer)
+ The Korean (nori) analyzer integrates Lucene nori analysis module into Solr.
+ It uses the mecab-ko-dic dictionary to perform morphological analysis of Korean texts.
+
+ This dictionary was built with MeCab, it defines a format for the features adapted
+ for the Korean language.
+
+ Nori also has a convenient user dictionary feature that allows overriding the statistical
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
+
+ The tokenizer supports multiple schema attributes:
+ * userDictionary: User dictionary path.
+ * userDictionaryEncoding: User dictionary encoding.
+ * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
+ * outputUnknownUnigrams: If true outputs unigrams for unknown words.
+ -->
+ <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
+ <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
+ listing the tags to remove. By default it removes:
+ E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
+ This is basically an equivalent to stemming.
+ -->
+ <filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
+ <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
+ <filter class="solr.KoreanReadingFormFilterFactory" />
+ <filter class="solr.LowerCaseFilterFactory" />
+ </analyzer>
+ </fieldType>
+
<!-- Latvian -->
<dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/>
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/489a9157/solr/example/example-DIH/solr/db/conf/managed-schema
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/db/conf/managed-schema b/solr/example/example-DIH/solr/db/conf/managed-schema
index ce41b1a..7da41de 100644
--- a/solr/example/example-DIH/solr/db/conf/managed-schema
+++ b/solr/example/example-DIH/solr/db/conf/managed-schema
@@ -996,6 +996,40 @@
</analyzer>
</fieldType>
+ <!-- Korean morphological analysis -->
+ <dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/>
+ <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <!-- Nori Korean morphological analyzer/tokenizer (KoreanTokenizer)
+ The Korean (nori) analyzer integrates Lucene nori analysis module into Solr.
+ It uses the mecab-ko-dic dictionary to perform morphological analysis of Korean texts.
+
+ This dictionary was built with MeCab, it defines a format for the features adapted
+ for the Korean language.
+
+ Nori also has a convenient user dictionary feature that allows overriding the statistical
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
+
+ The tokenizer supports multiple schema attributes:
+ * userDictionary: User dictionary path.
+ * userDictionaryEncoding: User dictionary encoding.
+ * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
+ * outputUnknownUnigrams: If true outputs unigrams for unknown words.
+ -->
+ <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
+ <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
+ listing the tags to remove. By default it removes:
+ E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
+ This is basically an equivalent to stemming.
+ -->
+ <filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
+ <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
+ <filter class="solr.KoreanReadingFormFilterFactory" />
+ <filter class="solr.LowerCaseFilterFactory" />
+ </analyzer>
+ </fieldType>
+
<!-- Latvian -->
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/489a9157/solr/example/example-DIH/solr/mail/conf/managed-schema
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/mail/conf/managed-schema b/solr/example/example-DIH/solr/mail/conf/managed-schema
index 71b03a8..1a371d4 100644
--- a/solr/example/example-DIH/solr/mail/conf/managed-schema
+++ b/solr/example/example-DIH/solr/mail/conf/managed-schema
@@ -915,6 +915,40 @@
</analyzer>
</fieldType>
+ <!-- Korean morphological analysis -->
+ <dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/>
+ <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <!-- Nori Korean morphological analyzer/tokenizer (KoreanTokenizer)
+ The Korean (nori) analyzer integrates Lucene nori analysis module into Solr.
+ It uses the mecab-ko-dic dictionary to perform morphological analysis of Korean texts.
+
+ This dictionary was built with MeCab, it defines a format for the features adapted
+ for the Korean language.
+
+ Nori also has a convenient user dictionary feature that allows overriding the statistical
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
+
+ The tokenizer supports multiple schema attributes:
+ * userDictionary: User dictionary path.
+ * userDictionaryEncoding: User dictionary encoding.
+ * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
+ * outputUnknownUnigrams: If true outputs unigrams for unknown words.
+ -->
+ <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
+ <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
+ listing the tags to remove. By default it removes:
+ E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
+ This is basically an equivalent to stemming.
+ -->
+ <filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
+ <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
+ <filter class="solr.KoreanReadingFormFilterFactory" />
+ <filter class="solr.LowerCaseFilterFactory" />
+ </analyzer>
+ </fieldType>
+
<!-- Latvian -->
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/489a9157/solr/example/example-DIH/solr/solr/conf/managed-schema
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/solr/conf/managed-schema b/solr/example/example-DIH/solr/solr/conf/managed-schema
index aa39ce5..5c360b9 100644
--- a/solr/example/example-DIH/solr/solr/conf/managed-schema
+++ b/solr/example/example-DIH/solr/solr/conf/managed-schema
@@ -996,6 +996,40 @@
</analyzer>
</fieldType>
+ <!-- Korean morphological analysis -->
+ <dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/>
+ <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <!-- Nori Korean morphological analyzer/tokenizer (KoreanTokenizer)
+ The Korean (nori) analyzer integrates Lucene nori analysis module into Solr.
+ It uses the mecab-ko-dic dictionary to perform morphological analysis of Korean texts.
+
+ This dictionary was built with MeCab, it defines a format for the features adapted
+ for the Korean language.
+
+ Nori also has a convenient user dictionary feature that allows overriding the statistical
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
+
+ The tokenizer supports multiple schema attributes:
+ * userDictionary: User dictionary path.
+ * userDictionaryEncoding: User dictionary encoding.
+ * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
+ * outputUnknownUnigrams: If true outputs unigrams for unknown words.
+ -->
+ <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
+ <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
+ listing the tags to remove. By default it removes:
+ E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
+ This is basically an equivalent to stemming.
+ -->
+ <filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
+ <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
+ <filter class="solr.KoreanReadingFormFilterFactory" />
+ <filter class="solr.LowerCaseFilterFactory" />
+ </analyzer>
+ </fieldType>
+
<!-- Latvian -->
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/489a9157/solr/example/files/conf/managed-schema
----------------------------------------------------------------------
diff --git a/solr/example/files/conf/managed-schema b/solr/example/files/conf/managed-schema
index d9f4538..c022331 100644
--- a/solr/example/files/conf/managed-schema
+++ b/solr/example/files/conf/managed-schema
@@ -322,6 +322,14 @@
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
+ <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
+ <filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
+ <filter class="solr.KoreanReadingFormFilterFactory" />
+ <filter class="solr.LowerCaseFilterFactory" />
+ </analyzer>
+ </fieldType>
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
@@ -470,6 +478,7 @@
<dynamicField name="*_txt_id" type="text_id" indexed="true" stored="true"/>
<dynamicField name="*_txt_it" type="text_it" indexed="true" stored="true"/>
<dynamicField name="*_txt_ja" type="text_ja" indexed="true" stored="true"/>
+ <dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/>
<dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/>
<dynamicField name="*_txt_nl" type="text_nl" indexed="true" stored="true"/>
<dynamicField name="*_txt_no" type="text_no" indexed="true" stored="true"/>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/489a9157/solr/server/solr/configsets/_default/conf/managed-schema
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/_default/conf/managed-schema b/solr/server/solr/configsets/_default/conf/managed-schema
index 1a04009..95c0c36 100644
--- a/solr/server/solr/configsets/_default/conf/managed-schema
+++ b/solr/server/solr/configsets/_default/conf/managed-schema
@@ -849,6 +849,40 @@
</analyzer>
</fieldType>
+ <!-- Korean morphological analysis -->
+ <dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/>
+ <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <!-- Nori Korean morphological analyzer/tokenizer (KoreanTokenizer)
+ The Korean (nori) analyzer integrates Lucene nori analysis module into Solr.
+ It uses the mecab-ko-dic dictionary to perform morphological analysis of Korean texts.
+
+ This dictionary was built with MeCab, it defines a format for the features adapted
+ for the Korean language.
+
+ Nori also has a convenient user dictionary feature that allows overriding the statistical
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
+
+ The tokenizer supports multiple schema attributes:
+ * userDictionary: User dictionary path.
+ * userDictionaryEncoding: User dictionary encoding.
+ * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
+ * outputUnknownUnigrams: If true outputs unigrams for unknown words.
+ -->
+ <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
+ <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
+ listing the tags to remove. By default it removes:
+ E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
+ This is basically an equivalent to stemming.
+ -->
+ <filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
+ <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
+ <filter class="solr.KoreanReadingFormFilterFactory" />
+ <filter class="solr.LowerCaseFilterFactory" />
+ </analyzer>
+ </fieldType>
+
<!-- Latvian -->
<dynamicField name="*_txt_lv" type="text_lv" indexed="true" stored="true"/>
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/489a9157/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema b/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema
index 5751806..b6d3d16 100644
--- a/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema
+++ b/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema
@@ -1032,6 +1032,40 @@
</analyzer>
</fieldType>
+ <!-- Korean morphological analysis -->
+ <dynamicField name="*_txt_ko" type="text_ko" indexed="true" stored="true"/>
+ <fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <!-- Nori Korean morphological analyzer/tokenizer (KoreanTokenizer)
+ The Korean (nori) analyzer integrates Lucene nori analysis module into Solr.
+ It uses the mecab-ko-dic dictionary to perform morphological analysis of Korean texts.
+
+ This dictionary was built with MeCab, it defines a format for the features adapted
+ for the Korean language.
+
+ Nori also has a convenient user dictionary feature that allows overriding the statistical
+ model with your own entries for segmentation, part-of-speech tags and readings without a need
+ to specify weights. Notice that user dictionaries have not been subject to extensive testing.
+
+ The tokenizer supports multiple schema attributes:
+ * userDictionary: User dictionary path.
+ * userDictionaryEncoding: User dictionary encoding.
+ * decompoundMode: Decompound mode. Either 'none', 'discard', 'mixed'. Default is 'discard'.
+ * outputUnknownUnigrams: If true outputs unigrams for unknown words.
+ -->
+ <tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
+ <!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
+ listing the tags to remove. By default it removes:
+ E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
+ This is basically an equivalent to stemming.
+ -->
+ <filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
+ <!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
+ <filter class="solr.KoreanReadingFormFilterFactory" />
+ <filter class="solr.LowerCaseFilterFactory" />
+ </analyzer>
+ </fieldType>
+
<!-- Latvian -->
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer>