You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2020/02/20 18:01:07 UTC
[lucene-solr] branch master updated: LUCENE-9155: Port Kuromoji
dictionary compilation (regenerate).
This is an automated email from the ASF dual-hosted git repository.
dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 62662e4 LUCENE-9155: Port Kuromoji dictionary compilation (regenerate).
62662e4 is described below
commit 62662e477afb8a01ff3061115add660d465ae62f
Author: Dawid Weiss <dw...@apache.org>
AuthorDate: Thu Feb 20 19:00:56 2020 +0100
LUCENE-9155: Port Kuromoji dictionary compilation (regenerate).
---
build.gradle | 1 +
gradle/generation/kuromoji.gradle | 107 ++++++++++++++++++++++++++++++++++++++
2 files changed, 108 insertions(+)
diff --git a/build.gradle b/build.gradle
index cff33c7..1bbf1a2 100644
--- a/build.gradle
+++ b/build.gradle
@@ -93,6 +93,7 @@ apply from: file('gradle/generation/jflex.gradle')
apply from: file('gradle/generation/javacc.gradle')
apply from: file('gradle/generation/util.gradle')
apply from: file('gradle/generation/snowball.gradle')
+apply from: file('gradle/generation/kuromoji.gradle')
// Additional development aids.
apply from: file('gradle/maven/maven-local.gradle')
diff --git a/gradle/generation/kuromoji.gradle b/gradle/generation/kuromoji.gradle
new file mode 100644
index 0000000..ad1aba7
--- /dev/null
+++ b/gradle/generation/kuromoji.gradle
@@ -0,0 +1,107 @@
+
+// This downloads and compiles Kuromoji dictionaries.
+
+configure(project(":lucene:analysis:kuromoji")) {
+ apply plugin: 'java-library'
+ apply plugin: "de.undercouch.download"
+
+ ext {
+ targetDir = file("src/resources")
+ }
+
+ task compileMecabIpadic(type: Download) {
+ description "Recompile mecab dictionaries."
+ group "generation"
+
+ dependsOn sourceSets.main.runtimeClasspath
+
+ def dictionaryName = "mecab-ipadic-2.7.0-20070801"
+ def dictionarySource = "https://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/${dictionaryName}.tar.gz"
+ def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz")
+ def unpackedDir = file("${buildDir}/generate/${dictionaryName}")
+
+ src dictionarySource
+ dest dictionaryFile
+ onlyIfModified true
+
+ doLast {
+ // Unpack the downloaded archive.
+ delete unpackedDir
+ ant.untar(src: dictionaryFile, dest: unpackedDir, compression: "gzip") {
+ ant.cutdirsmapper(dirs: "1")
+ }
+
+ // Apply patch via local git.
+ project.exec {
+ workingDir = unpackedDir
+ executable "git"
+ args += [
+ "apply",
+ file("src/tools/patches/Noun.proper.csv.patch").absolutePath
+ ]
+ }
+
+ // Compile the dictionary
+ project.javaexec {
+ main = "org.apache.lucene.analysis.ja.util.DictionaryBuilder"
+ classpath = sourceSets.main.runtimeClasspath
+
+ jvmArgs '-Xmx1G'
+
+ args += [
+ "ipadic",
+ unpackedDir,
+ targetDir,
+ "euc-jp",
+ false
+ ]
+
+ logger.lifecycle("Automaton regenerated from dictionary: ${dictionaryName}")
+ }
+ }
+ }
+
+ /*
+ TODO: this currently doesn't work because DictionaryBuilder no longer supports this type?
+
+ task compileNaist(type: Download) {
+ description "Recompile naist dictionaries."
+ group "generation"
+
+ dependsOn sourceSets.main.runtimeClasspath
+
+ def dictionaryName = "mecab-naist-jdic-0.6.3b-20111013"
+ def dictionarySource = "https://rwthaachen.dl.osdn.jp/naist-jdic/53500/${dictionaryName}.tar.gz"
+ def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz")
+ def unpackedDir = file("${buildDir}/generate/${dictionaryName}")
+
+ src dictionarySource
+ dest dictionaryFile
+ onlyIfModified true
+
+ doLast {
+ // Unpack the downloaded archive.
+ delete unpackedDir
+ ant.untar(src: dictionaryFile, dest: unpackedDir, compression: "gzip") {
+ ant.cutdirsmapper(dirs: "1")
+ }
+
+ // Compile the dictionary
+ project.javaexec {
+ main = "org.apache.lucene.analysis.ja.util.DictionaryBuilder"
+ classpath = sourceSets.main.runtimeClasspath
+
+ jvmArgs '-Xmx1G'
+
+ args += [
+ "naist",
+ unpackedDir,
+ targetDir,
+ "euc-jp",
+ false
+ ]
+ }
+ }
+ }
+ */
+}