You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2020/02/21 09:24:13 UTC
[lucene-solr] branch master updated: LUCENE-9155: add missing naist
dictionary generation, clean up the code a bit.
This is an automated email from the ASF dual-hosted git repository.
dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new f8a2c39 LUCENE-9155: add missing naist dictionary generation, clean up the code a bit.
f8a2c39 is described below
commit f8a2c3990686050ab42077e76724388910413881
Author: Dawid Weiss <dw...@apache.org>
AuthorDate: Fri Feb 21 10:24:05 2020 +0100
LUCENE-9155: add missing naist dictionary generation, clean up the code a bit.
---
gradle/generation/kuromoji.gradle | 56 ++++++++++++++++++++++-----------------
1 file changed, 32 insertions(+), 24 deletions(-)
diff --git a/gradle/generation/kuromoji.gradle b/gradle/generation/kuromoji.gradle
index 981fc0e..2f55c1a 100644
--- a/gradle/generation/kuromoji.gradle
+++ b/gradle/generation/kuromoji.gradle
@@ -17,6 +17,18 @@
// This downloads and compiles Kuromoji dictionaries.
+def recompileDictionary(project, dictionaryName, Closure closure) {
+ project.javaexec {
+ main = "org.apache.lucene.analysis.ja.util.DictionaryBuilder"
+ classpath = project.sourceSets.main.runtimeClasspath
+
+ jvmArgs '-Xmx1G'
+
+ with closure
+ }
+ project.logger.lifecycle("Automaton regenerated from dictionary: ${dictionaryName}")
+}
+
configure(project(":lucene:analysis:kuromoji")) {
apply plugin: 'java-library'
apply plugin: "de.undercouch.download"
@@ -25,10 +37,21 @@ configure(project(":lucene:analysis:kuromoji")) {
targetDir = file("src/resources")
}
- task compileMecabIpadic(type: Download) {
- description "Recompile mecab dictionaries."
+ task deleteDictionaryData() {
+ // There should really be just one but since we don't know which
+ // one it'll be, let's process all of them.
+ doFirst {
+ sourceSets.main.resources.srcDirs.each { location ->
+ delete fileTree(dir: location, include: "org/apache/lucene/analysis/ja/dict/*.dat")
+ }
+ }
+ }
+
+ task compileMecab(type: Download) {
+ description "Recompile dictionaries from Mecab data."
group "generation"
+ dependsOn deleteDictionaryData
dependsOn sourceSets.main.runtimeClasspath
def dictionaryName = "mecab-ipadic-2.7.0-20070801"
@@ -58,12 +81,7 @@ configure(project(":lucene:analysis:kuromoji")) {
}
// Compile the dictionary
- project.javaexec {
- main = "org.apache.lucene.analysis.ja.util.DictionaryBuilder"
- classpath = sourceSets.main.runtimeClasspath
-
- jvmArgs '-Xmx1G'
-
+ recompileDictionary(project, dictionaryName, {
args += [
"ipadic",
unpackedDir,
@@ -71,19 +89,15 @@ configure(project(":lucene:analysis:kuromoji")) {
"euc-jp",
false
]
-
- logger.lifecycle("Automaton regenerated from dictionary: ${dictionaryName}")
- }
+ })
}
}
- /*
- TODO: this currently doesn't work because DictionaryBuilder no longer supports this type?
-
task compileNaist(type: Download) {
- description "Recompile naist dictionaries."
+ description "Recompile dictionaries from Naist data."
group "generation"
+ dependsOn deleteDictionaryData
dependsOn sourceSets.main.runtimeClasspath
def dictionaryName = "mecab-naist-jdic-0.6.3b-20111013"
@@ -103,21 +117,15 @@ configure(project(":lucene:analysis:kuromoji")) {
}
// Compile the dictionary
- project.javaexec {
- main = "org.apache.lucene.analysis.ja.util.DictionaryBuilder"
- classpath = sourceSets.main.runtimeClasspath
-
- jvmArgs '-Xmx1G'
-
+ recompileDictionary(project, dictionaryName, {
args += [
- "naist",
+ "ipadic",
unpackedDir,
targetDir,
"euc-jp",
false
]
- }
+ })
}
}
- */
}