You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/04/02 09:46:51 UTC

[lucene] branch main updated: LUCENE-9900: Regenerate/ run ICU only if inputs changed (#61)

This is an automated email from the ASF dual-hosted git repository.

dweiss pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 010e3a1  LUCENE-9900: Regenerate/ run ICU only if inputs changed (#61)
010e3a1 is described below

commit 010e3a1ba960ea0dcd7e5092073d2a55a21b0ac9
Author: Dawid Weiss <da...@carrotsearch.com>
AuthorDate: Fri Apr 2 11:46:43 2021 +0200

    LUCENE-9900: Regenerate/ run ICU only if inputs changed (#61)
---
 gradle/generation/icu.gradle                       |  31 ++++++++++++++-------
 gradle/generation/javacc.gradle                    |   6 ++--
 gradle/generation/jflex.gradle                     |  10 +++----
 gradle/generation/moman.gradle                     |   4 +--
 gradle/generation/regenerate.gradle                |  24 ++++++++++++----
 .../icu/src/generated/checksums/genRbbi.json       |   6 ++++
 .../src/generated/checksums/genUtr30DataFiles.json |  11 ++++++++
 .../org/apache/lucene/analysis/icu/utr30.nrm       | Bin 58720 -> 59200 bytes
 8 files changed, 67 insertions(+), 25 deletions(-)

diff --git a/gradle/generation/icu.gradle b/gradle/generation/icu.gradle
index d450d48..f7f2efc 100644
--- a/gradle/generation/icu.gradle
+++ b/gradle/generation/icu.gradle
@@ -43,12 +43,21 @@ configure(project(":lucene:analysis:icu")) {
     icupkg = file("${icuBinDir}/icupkg")
   }
 
+  def icuCompileTask = Os.isFamily(Os.FAMILY_WINDOWS) ? "compileIcuWindows" : "compileIcuLinux"
+
   task genUtr30DataFiles() {
-    dependsOn Os.isFamily(Os.FAMILY_WINDOWS) ? "compileIcuWindows" : "compileIcuLinux"
+    dependsOn icuCompileTask
 
     // May be undefined yet, so use a provider.
     dependsOn { sourceSets.tools.runtimeClasspath }
 
+    // gennorm generates file order-dependent output, so make it constant here.
+    def inputFiles = fileTree(dir: utr30DataDir, include: "*.txt").asList().toSorted(Comparator.comparing { File f -> f.name })
+    def outputFile = file("src/resources/org/apache/lucene/analysis/icu/utr30.nrm")
+
+    inputs.files inputFiles
+    outputs.file outputFile
+
     doFirst {
       // all these steps must be done sequentially: it's a pipeline resulting in utr30.nrm
       def v = getVersion('com.ibm.icu', 'icu4j');
@@ -71,9 +80,7 @@ configure(project(":lucene:analysis:icu")) {
             utr30DataDir,
             "-o",
             "${buildDir}/utr30.tmp",
-            "nfc.txt", "nfkc.txt", "nfkc_cf.txt", "BasicFoldings.txt",
-            "DiacriticFolding.txt", "DingbatFolding.txt", "HanRadicalFolding.txt",
-            "NativeDigitFolding.txt"
+            *(inputFiles.collect { it.name })
         ]
       }
 
@@ -82,7 +89,7 @@ configure(project(":lucene:analysis:icu")) {
         args = [
             "-tb",
             "${buildDir}/utr30.tmp",
-            "src/resources/org/apache/lucene/analysis/icu/utr30.nrm"
+            outputFile
         ]
       }
     }
@@ -92,6 +99,12 @@ configure(project(":lucene:analysis:icu")) {
     // May be undefined yet, so use a provider.
     dependsOn { sourceSets.tools.runtimeClasspath }
 
+    def sourceDir = file("src/data/uax29")
+    def targetDir = file("src/resources/org/apache/lucene/analysis/icu/segmentation")
+
+    inputs.files fileTree(dir: sourceDir, include: "*.rbbi")
+    outputs.files fileTree(dir: targetDir, include: "*.brk")
+
     doFirst {
       project.javaexec {
         main = "org.apache.lucene.analysis.icu.RBBIRuleCompiler"
@@ -99,15 +112,13 @@ configure(project(":lucene:analysis:icu")) {
 
         ignoreExitValue false
         enableAssertions true
-        args = [
-            "src/data/uax29",
-            "src/resources/org/apache/lucene/analysis/icu/segmentation"
-        ]
+        args = [ sourceDir, targetDir ]
       }
     }
   }
 
-  regenerate.dependsOn genUtr30DataFiles, genRbbi
+  regenerate.dependsOn wrapWithPersistentChecksums(genUtr30DataFiles, [ ignoreWithSource: icuCompileTask ])
+  regenerate.dependsOn wrapWithPersistentChecksums(genRbbi)
 
   task compileIcuWindows() {
     doFirst {
diff --git a/gradle/generation/javacc.gradle b/gradle/generation/javacc.gradle
index 177975e..f527034 100644
--- a/gradle/generation/javacc.gradle
+++ b/gradle/generation/javacc.gradle
@@ -252,9 +252,9 @@ configure(project(":lucene:queryparser")) {
     description "Regenerate query parsers (javacc syntax definitions)."
     group "generation"
 
-    dependsOn wrapWithPersistentChecksums(javaccParserClassic, "spotlessApply"),
-        wrapWithPersistentChecksums(javaccParserSurround, "spotlessApply"),
-        wrapWithPersistentChecksums(javaccParserFlexible, "spotlessApply")
+    dependsOn wrapWithPersistentChecksums(javaccParserClassic, [ andThenTasks: "spotlessApply" ]),
+        wrapWithPersistentChecksums(javaccParserSurround, [ andThenTasks: "spotlessApply" ]),
+        wrapWithPersistentChecksums(javaccParserFlexible, [ andThenTasks: "spotlessApply" ])
   }
 
   regenerate.dependsOn javacc
diff --git a/gradle/generation/jflex.gradle b/gradle/generation/jflex.gradle
index 57c7801..d0810b7 100644
--- a/gradle/generation/jflex.gradle
+++ b/gradle/generation/jflex.gradle
@@ -49,7 +49,7 @@ configure(project(":lucene:core")) {
     }
   }
 
-  regenerate.dependsOn wrapWithPersistentChecksums(generateStandardTokenizer, "spotlessApply")
+  regenerate.dependsOn wrapWithPersistentChecksums(generateStandardTokenizer, [ andThenTasks: "spotlessApply" ])
 }
 
 configure(project(":lucene:analysis:common")) {
@@ -118,10 +118,10 @@ configure(project(":lucene:analysis:common")) {
     }
   }
 
-  regenerate.dependsOn wrapWithPersistentChecksums(generateWikipediaTokenizer, "spotlessApply"),
-      wrapWithPersistentChecksums(generateClassicTokenizer, "spotlessApply"),
-      wrapWithPersistentChecksums(generateUAX29URLEmailTokenizer, "spotlessApply"),
-      wrapWithPersistentChecksums(generateHTMLStripCharFilter, "spotlessApply")
+  regenerate.dependsOn wrapWithPersistentChecksums(generateWikipediaTokenizer, [ andThenTasks: "spotlessApply" ]),
+      wrapWithPersistentChecksums(generateClassicTokenizer, [ andThenTasks: "spotlessApply" ]),
+      wrapWithPersistentChecksums(generateUAX29URLEmailTokenizer, [ andThenTasks: "spotlessApply" ]),
+      wrapWithPersistentChecksums(generateHTMLStripCharFilter, [ andThenTasks: "spotlessApply" ])
 }
 
 class JFlexTask extends DefaultTask {
diff --git a/gradle/generation/moman.gradle b/gradle/generation/moman.gradle
index 32a0e0d..39b6e87 100644
--- a/gradle/generation/moman.gradle
+++ b/gradle/generation/moman.gradle
@@ -99,8 +99,8 @@ configure(project(":lucene:core")) {
     description "Regenerate Moman-based sources."
     group "generation"
 
-    dependsOn wrapWithPersistentChecksums(utilGenPacked, "spotlessApply")
-    dependsOn wrapWithPersistentChecksums(utilGenLev, "spotlessApply")
+    dependsOn wrapWithPersistentChecksums(utilGenPacked, [ andThenTasks: "spotlessApply" ])
+    dependsOn wrapWithPersistentChecksums(utilGenLev, [ andThenTasks: "spotlessApply" ])
   }
 
   regenerate.dependsOn moman
diff --git a/gradle/generation/regenerate.gradle b/gradle/generation/regenerate.gradle
index 920c40c..2f7b8b5 100644
--- a/gradle/generation/regenerate.gradle
+++ b/gradle/generation/regenerate.gradle
@@ -56,9 +56,23 @@ configure([
       // constraints to each node in the execution graph but not node-and-dependencies).
       //
       // sourceTask - the task to wrap
-      // otherTasks - other tasks that should be scheduled to run after source task and
-      //   before checksum calculation.
-      wrapWithPersistentChecksums = { Task sourceTask, Object... otherTasks ->
+      // extraConfig - a map with extra (optional) configuration options.
+      //   andThenTasks: other tasks that should be scheduled to run after source task and
+      //     before checksum calculation.
+      wrapWithPersistentChecksums = { Task sourceTask, Map<String, Object> extraConfig = [:] ->
+        def toList = { value ->
+          if (value instanceof List) {
+            return value
+          } else if (value == null) {
+            return []
+          } else {
+            return [ value ]
+          }
+        }
+
+        List<Object> andThenTasks = toList(extraConfig.get("andThenTasks"))
+        List<Object> ignoreWithSource =  toList(extraConfig.get("ignoreWithSource"))
+
         // Create checksum-loader task.
         Task checksumLoadTask = tasks.create("${sourceTask.name}ChecksumLoad", {
           ext {
@@ -140,7 +154,7 @@ configure([
           def deps = [
               checksumLoadTask,
               sourceTask,
-              *otherTasks,
+              *andThenTasks,
               checksumSaveTask
           ].flatten()
 
@@ -163,7 +177,7 @@ configure([
         // Set conditional execution only if checksum mismatch occurred.
         if (!gradle.startParameter.isRerunTasks()) {
           project.afterEvaluate {
-            resolveTaskRefs([sourceTask, checksumSaveTask]).each { t ->
+            resolveTaskRefs([sourceTask, *ignoreWithSource, checksumSaveTask]).each { t ->
               t.configure {
                 logger.info("Making " + t.name + " run only if " + checksumLoadTask.name + " indicates changes")
                 onlyIf { !checksumLoadTask.checksumMatch }
diff --git a/lucene/analysis/icu/src/generated/checksums/genRbbi.json b/lucene/analysis/icu/src/generated/checksums/genRbbi.json
new file mode 100644
index 0000000..7607c4e
--- /dev/null
+++ b/lucene/analysis/icu/src/generated/checksums/genRbbi.json
@@ -0,0 +1,6 @@
+{
+    "lucene/analysis/icu/src/data/uax29/Default.rbbi": "71bfaee5e81ac272aff828d1e44d0612be1b8363",
+    "lucene/analysis/icu/src/data/uax29/MyanmarSyllable.rbbi": "4c6817658b454add5ec1f9ac8c0015ce8eb3b5f2",
+    "lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk": "1b9013b7ef4ba32a851a330c58a8fa820b9dda79",
+    "lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk": "cc023ec17e0148518086098691785a32b88ee09a"
+}
\ No newline at end of file
diff --git a/lucene/analysis/icu/src/generated/checksums/genUtr30DataFiles.json b/lucene/analysis/icu/src/generated/checksums/genUtr30DataFiles.json
new file mode 100644
index 0000000..33a412f
--- /dev/null
+++ b/lucene/analysis/icu/src/generated/checksums/genUtr30DataFiles.json
@@ -0,0 +1,11 @@
+{
+    "lucene/analysis/icu/src/data/utr30/BasicFoldings.txt": "fc5badae307c5b740f8dd81407e4be04e3e2fde6",
+    "lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt": "2d48c24758ed9322b49eee84c23c74b7f11337c6",
+    "lucene/analysis/icu/src/data/utr30/DingbatFolding.txt": "ca562b2029e208bdf07cb2e5f689e509a336265e",
+    "lucene/analysis/icu/src/data/utr30/HanRadicalFolding.txt": "f5848aa38cbf1a11131d59384f31477f3b7f077b",
+    "lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt": "434ad7eb17fc32a441385795e4a4aa7e589ec30a",
+    "lucene/analysis/icu/src/data/utr30/nfc.txt": "ec95d7f7e5910791717234dd09efc4b13dc32d35",
+    "lucene/analysis/icu/src/data/utr30/nfkc.txt": "b8e91bc64e354af505d51f6072a43c90c6b4d1b5",
+    "lucene/analysis/icu/src/data/utr30/nfkc_cf.txt": "22d90ea4a7771e6ddebaaeb9438e98ce625e16f5",
+    "lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm": "32a6a3198039883c93f9ebef31fe24c1029f2b07"
+}
\ No newline at end of file
diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm
index 92a6919..6409b87 100644
Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm differ