You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by na...@apache.org on 2019/08/08 15:40:26 UTC
[lucene-solr] branch branch_8x updated: LUCENE-8912: remove
nori/tools dependency on ICU
This is an automated email from the ASF dual-hosted git repository.
namgyu pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 2cabbf8 LUCENE-8912: remove nori/tools dependency on ICU
2cabbf8 is described below
commit 2cabbf81524fc3e94e53a7a3f00c7419d484c838
Author: Namgyu Kim <na...@apache.org>
AuthorDate: Fri Aug 9 00:40:03 2019 +0900
LUCENE-8912: remove nori/tools dependency on ICU
---
lucene/analysis/nori/build.xml | 14 +-------------
.../analysis/ko/util/TokenInfoDictionaryBuilder.java | 12 +++++-------
2 files changed, 6 insertions(+), 20 deletions(-)
diff --git a/lucene/analysis/nori/build.xml b/lucene/analysis/nori/build.xml
index 0938de2..6b82816 100644
--- a/lucene/analysis/nori/build.xml
+++ b/lucene/analysis/nori/build.xml
@@ -57,13 +57,8 @@
<untar src="${build.dir}/${dict.version}.tar" dest="${build.dir}"/>
</target>
- <path id="tools.dependencies">
- <fileset dir="../icu/lib"/>
- </path>
-
<path id="tools.classpath">
<path refid="classpath"/>
- <path refid="tools.dependencies"/>
<pathelement location="${build.dir}/classes/java"/>
<pathelement location="${build.dir}/classes/tools"/>
</path>
@@ -95,14 +90,7 @@
</sequential>
</target>
- <!-- we don't actually need to compile this thing, we just want its lib -->
- <target name="resolve-icu">
- <ant dir="../icu/" target="resolve" inheritAll="false">
- <propertyset refid="uptodate.and.compiled.properties"/>
- </ant>
- </target>
-
- <target name="compile-tools" depends="resolve-icu, compile-core, common.compile-tools">
+ <target name="compile-tools" depends="compile-core, common.compile-tools">
<compile
srcdir="src/tools/java"
destdir="${build.dir}/classes/tools">
diff --git a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java
index 6609f50..27c72da 100644
--- a/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java
+++ b/lucene/analysis/nori/src/tools/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryBuilder.java
@@ -25,6 +25,7 @@ import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
+import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -35,7 +36,6 @@ import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
-import com.ibm.icu.text.Normalizer2;
import org.apache.lucene.util.fst.PositiveIntOutputs;
public class TokenInfoDictionaryBuilder {
@@ -45,13 +45,11 @@ public class TokenInfoDictionaryBuilder {
private String encoding = "utf-8";
- private boolean normalizeEntries = false;
- private Normalizer2 normalizer;
+ private Normalizer.Form normalForm;
public TokenInfoDictionaryBuilder(String encoding, boolean normalizeEntries) {
this.encoding = encoding;
- this.normalizeEntries = normalizeEntries;
- this.normalizer = normalizeEntries ? Normalizer2.getInstance(null, "nfkc", Normalizer2.Mode.COMPOSE) : null;
+ this.normalForm = normalizeEntries ? Normalizer.Form.NFKC : null;
}
public TokenInfoDictionaryWriter build(String dirname) throws IOException {
@@ -88,10 +86,10 @@ public class TokenInfoDictionaryBuilder {
}
// NFKC normalize dictionary entry
- if (normalizeEntries) {
+ if (normalForm != null) {
String[] normalizedEntry = new String[entry.length];
for (int i = 0; i < entry.length; i++) {
- normalizedEntry[i] = normalizer.normalize(entry[i]);
+ normalizedEntry[i] = Normalizer.normalize(entry[i], normalForm);
}
lines.add(normalizedEntry);
} else {