You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2020/02/21 09:24:13 UTC

[lucene-solr] branch master updated: LUCENE-9155: add missing naist dictionary generation, clean up the code a bit.

This is an automated email from the ASF dual-hosted git repository.

dweiss pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new f8a2c39  LUCENE-9155: add missing naist dictionary generation, clean up the code a bit.
f8a2c39 is described below

commit f8a2c3990686050ab42077e76724388910413881
Author: Dawid Weiss <dw...@apache.org>
AuthorDate: Fri Feb 21 10:24:05 2020 +0100

    LUCENE-9155: add missing naist dictionary generation, clean up the code a bit.
---
 gradle/generation/kuromoji.gradle | 56 ++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/gradle/generation/kuromoji.gradle b/gradle/generation/kuromoji.gradle
index 981fc0e..2f55c1a 100644
--- a/gradle/generation/kuromoji.gradle
+++ b/gradle/generation/kuromoji.gradle
@@ -17,6 +17,18 @@
 
 // This downloads and compiles Kuromoji dictionaries.
 
+def recompileDictionary(project, dictionaryName, Closure closure) {
+  project.javaexec {
+    main = "org.apache.lucene.analysis.ja.util.DictionaryBuilder"
+    classpath = project.sourceSets.main.runtimeClasspath
+
+    jvmArgs '-Xmx1G'
+
+    with closure
+  }
+  project.logger.lifecycle("Automaton regenerated from dictionary: ${dictionaryName}")
+}
+
 configure(project(":lucene:analysis:kuromoji")) {
   apply plugin: 'java-library'
   apply plugin: "de.undercouch.download"
@@ -25,10 +37,21 @@ configure(project(":lucene:analysis:kuromoji")) {
     targetDir = file("src/resources")
   }
 
-  task compileMecabIpadic(type: Download) {
-    description "Recompile mecab dictionaries."
+  task deleteDictionaryData() {
+    // There should really be just one but since we don't know which
+    // one it'll be, let's process all of them.
+    doFirst {
+      sourceSets.main.resources.srcDirs.each { location ->
+        delete fileTree(dir: location, include: "org/apache/lucene/analysis/ja/dict/*.dat")
+      }
+    }
+  }
+
+  task compileMecab(type: Download) {
+    description "Recompile dictionaries from Mecab data."
     group "generation"
 
+    dependsOn deleteDictionaryData
     dependsOn sourceSets.main.runtimeClasspath
 
     def dictionaryName = "mecab-ipadic-2.7.0-20070801"
@@ -58,12 +81,7 @@ configure(project(":lucene:analysis:kuromoji")) {
       }
 
       // Compile the dictionary
-      project.javaexec {
-        main = "org.apache.lucene.analysis.ja.util.DictionaryBuilder"
-        classpath = sourceSets.main.runtimeClasspath
-
-        jvmArgs '-Xmx1G'
-
+      recompileDictionary(project, dictionaryName, {
         args += [
             "ipadic",
             unpackedDir,
@@ -71,19 +89,15 @@ configure(project(":lucene:analysis:kuromoji")) {
             "euc-jp",
             false
         ]
-
-        logger.lifecycle("Automaton regenerated from dictionary: ${dictionaryName}")
-      }
+      })
     }
   }
 
-  /*
-  TODO: this currently doesn't work because DictionaryBuilder no longer supports this type?
-
   task compileNaist(type: Download) {
-    description "Recompile naist dictionaries."
+    description "Recompile dictionaries from Naist data."
     group "generation"
 
+    dependsOn deleteDictionaryData
     dependsOn sourceSets.main.runtimeClasspath
 
     def dictionaryName = "mecab-naist-jdic-0.6.3b-20111013"
@@ -103,21 +117,15 @@ configure(project(":lucene:analysis:kuromoji")) {
       }
 
       // Compile the dictionary
-      project.javaexec {
-        main = "org.apache.lucene.analysis.ja.util.DictionaryBuilder"
-        classpath = sourceSets.main.runtimeClasspath
-
-        jvmArgs '-Xmx1G'
-
+      recompileDictionary(project, dictionaryName, {
         args += [
-            "naist",
+            "ipadic",
             unpackedDir,
             targetDir,
             "euc-jp",
             false
         ]
-      }
+      })
     }
   }
-   */
 }