You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/04/02 07:56:55 UTC
[lucene] branch main updated: LUCENE-9872: Make the most painful
tasks in regenerate fully incremental (#60)
This is an automated email from the ASF dual-hosted git repository.
dweiss pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new e3ae57a LUCENE-9872: Make the most painful tasks in regenerate fully incremental (#60)
e3ae57a is described below
commit e3ae57a3c1ac59a1b56a36d965000f45abbc43e5
Author: Dawid Weiss <da...@carrotsearch.com>
AuthorDate: Fri Apr 2 09:56:47 2021 +0200
LUCENE-9872: Make the most painful tasks in regenerate fully incremental (#60)
---
build.gradle | 2 +-
buildSrc/build.gradle | 15 +
gradle/generation/javacc.gradle | 30 +-
gradle/generation/jflex.gradle | 157 +++++-----
.../generation/jflex/skeleton.default.txt | 0
.../jflex/skeleton.disable.buffer.expansion.txt | 0
gradle/generation/{util.gradle => moman.gradle} | 37 ++-
.../generation/moman}/createLevAutomata.py | 0
gradle/generation/moman/gen_BulkOperation.py | 337 +++++++++++++++++++++
gradle/generation/moman/gen_Packed64SingleBlock.py | 298 ++++++++++++++++++
gradle/generation/regenerate.gradle | 160 ++++++++--
gradle/generation/snowball.gradle | 2 +-
gradle/globals.gradle | 30 ++
gradle/publishing/distribution.gradle | 11 -
gradle/validation/jar-checks.gradle | 11 -
gradle/validation/precommit.gradle | 12 +-
lucene/analysis/common/checksums.properties | 2 +
.../checksums/generateClassicTokenizer.json | 5 +
.../checksums/generateHTMLStripCharFilter.json | 5 +
.../checksums/generateUAX29URLEmailTokenizer.json | 5 +
.../checksums/generateWikipediaTokenizer.json | 5 +
.../checksums/generateStandardTokenizer.json | 5 +
.../core/src/generated/checksums/utilGenLev.json | 6 +
.../src/generated/checksums/utilGenPacked.json | 30 ++
.../generated/checksums/javaccParserClassic.json | 9 +
.../generated/checksums/javaccParserFlexible.json | 9 +
.../generated/checksums/javaccParserSurround.json | 9 +
27 files changed, 1047 insertions(+), 145 deletions(-)
diff --git a/build.gradle b/build.gradle
index 648c419..11747b7 100644
--- a/build.gradle
+++ b/build.gradle
@@ -146,7 +146,7 @@ apply from: file('gradle/validation/spotless.gradle')
// Source or data regeneration tasks
apply from: file('gradle/generation/regenerate.gradle')
apply from: file('gradle/generation/jflex.gradle')
-apply from: file('gradle/generation/util.gradle')
+apply from: file('gradle/generation/moman.gradle')
apply from: file('gradle/generation/snowball.gradle')
apply from: file('gradle/generation/kuromoji.gradle')
apply from: file('gradle/generation/nori.gradle')
diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle
index 63e733b..e1d90c4 100644
--- a/buildSrc/build.gradle
+++ b/buildSrc/build.gradle
@@ -19,7 +19,22 @@
// Make sure the build environment is consistent.
apply from: file('../gradle/validation/check-environment.gradle')
+repositories {
+ mavenCentral()
+}
+
+ext {
+ // Declare script dependency versions outside of palantir's
+ // version unification control. These are not our main dependencies.
+ scriptDepVersions = [
+ "commons-codec": "1.13"
+ ]
+}
+
dependencies {
implementation gradleApi()
implementation localGroovy()
+
+ implementation "commons-codec:commons-codec:${scriptDepVersions['commons-codec']}"
}
+
diff --git a/gradle/generation/javacc.gradle b/gradle/generation/javacc.gradle
index 1294ef2..177975e 100644
--- a/gradle/generation/javacc.gradle
+++ b/gradle/generation/javacc.gradle
@@ -252,27 +252,41 @@ configure(project(":lucene:queryparser")) {
description "Regenerate query parsers (javacc syntax definitions)."
group "generation"
- dependsOn javaccParserClassic
- dependsOn javaccParserSurround
- dependsOn javaccParserFlexible
+ dependsOn wrapWithPersistentChecksums(javaccParserClassic, "spotlessApply"),
+ wrapWithPersistentChecksums(javaccParserSurround, "spotlessApply"),
+ wrapWithPersistentChecksums(javaccParserFlexible, "spotlessApply")
}
- regenerate.dependsOn javacc, tidy
+ regenerate.dependsOn javacc
}
// We always regenerate, no need to declare outputs.
class JavaCCTask extends DefaultTask {
- @Input
+ @InputFile
File javaccFile
/**
* Apply closures to all generated files before they're copied back
* to mainline code.
*/
- @Optional
- @Input
List<Closure<FileTree>> afterGenerate = new ArrayList<>()
+ @OutputFiles
+ List<File> getGeneratedSources() {
+ // Return the list of generated files.
+ def baseDir = javaccFile.parentFile
+ def baseName = javaccFile.name.replace(".jj", "")
+
+ return [
+ project.file("${baseDir}/${baseName}.java"),
+ project.file("${baseDir}/${baseName}Constants.java"),
+ project.file("${baseDir}/${baseName}TokenManager.java"),
+ project.file("${baseDir}/ParseException.java"),
+ project.file("${baseDir}/Token.java"),
+ project.file("${baseDir}/TokenMgrError.java")
+ ]
+ }
+
JavaCCTask() {
dependsOn(project.rootProject.configurations.javacc)
}
@@ -290,7 +304,7 @@ class JavaCCTask extends DefaultTask {
project.delete project.fileTree(tempDir, { include: "**/*.java" })
def targetDir = javaccFile.parentFile
- logger.lifecycle("Regenerating JavaCC:\n from: ${javaccFile}\n to: ${targetDir}")
+ logger.lifecycle("Recompiling JavaCC: ${project.rootDir.relativePath(javaccFile)}")
def output = new ByteArrayOutputStream()
def result = project.javaexec {
diff --git a/gradle/generation/jflex.gradle b/gradle/generation/jflex.gradle
index 3e9a949..57c7801 100644
--- a/gradle/generation/jflex.gradle
+++ b/gradle/generation/jflex.gradle
@@ -15,7 +15,6 @@
* limitations under the License.
*/
-
// Add a top-level pseudo-task to which we will attach individual regenerate tasks.
configure(rootProject) {
@@ -28,71 +27,17 @@ configure(rootProject) {
}
}
-// We always regenerate, no need to declare outputs.
-class JFlexTask extends DefaultTask {
- @Input
- File jflexFile
-
- @Input
- File skeleton
-
- @Optional
- String heapSize
-
- JFlexTask() {
- dependsOn(project.rootProject.configurations.jflex)
- }
-
- @TaskAction
- def generate() {
- if (!jflexFile || !jflexFile.exists()) {
- throw new RuntimeException("JFlex file does not exist: ${jflexFile}")
- }
- def targetDir = jflexFile.parentFile
- def target = jflexFile.absolutePath.replace(".jflex", ".java")
-
- logger.lifecycle("Regenerating JFlex:\n from: ${jflexFile}\n to: ${target}")
- project.javaexec {
- classpath {
- project.rootProject.configurations.jflex
- }
- main = "jflex.Main"
- args += [
- "-nobak",
- "--quiet",
- "--encoding", "UTF-8",
- ]
-
- if (heapSize) {
- maxHeapSize = heapSize
- }
-
- if (skeleton) {
- args += ["--skel", skeleton.absolutePath]
- }
-
- args += [
- "-d", targetDir.absolutePath,
- jflexFile
- ]
- }
-
- // Correct line endings for Windows.
- project.ant.fixcrlf(
- file: target,
- encoding: "UTF-8",
- eol: "lf"
- )
- }
-}
+def resources = scriptResources(buildscript)
+def skeletonDefault = file("${resources}/skeleton.default.txt")
+def skeletonNoBufferExpansion = file("${resources}/skeleton.disable.buffer.expansion.txt")
configure(project(":lucene:core")) {
- task jflexStandardTokenizerImpl(type: JFlexTask) {
+ task generateStandardTokenizer(type: JFlexTask) {
description "Regenerate StandardTokenizerImpl.java"
group "generation"
jflexFile = file('src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex')
- skeleton = file("src/data/jflex/skeleton.disable.buffer.expansion.txt")
+ skeleton = skeletonNoBufferExpansion
doLast {
ant.replace(
@@ -104,33 +49,32 @@ configure(project(":lucene:core")) {
}
}
- regenerate.dependsOn jflexStandardTokenizerImpl, "tidy"
+ regenerate.dependsOn wrapWithPersistentChecksums(generateStandardTokenizer, "spotlessApply")
}
configure(project(":lucene:analysis:common")) {
-
- task jflexWikipediaTokenizerImpl(type: JFlexTask) {
+ task generateWikipediaTokenizer(type: JFlexTask) {
description "Regenerate WikipediaTokenizerImpl.java"
group "generation"
jflexFile = file('src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex')
- skeleton = project(":lucene:core").file("src/data/jflex/skeleton.default")
+ skeleton = skeletonDefault
}
- task jflexClassicTokenizerImpl(type: JFlexTask) {
+ task generateClassicTokenizer(type: JFlexTask) {
description "Regenerate ClassicTokenizerImpl.java"
group "generation"
jflexFile = file('src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.jflex')
- skeleton = project(":lucene:core").file("src/data/jflex/skeleton.default")
+ skeleton = skeletonDefault
}
- task jflexUAX29URLEmailTokenizerImpl(type: JFlexTask) {
+ task generateUAX29URLEmailTokenizer(type: JFlexTask) {
description "Regenerate UAX29URLEmailTokenizerImpl.java"
group "generation"
jflexFile = file('src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.jflex')
- skeleton = project(":lucene:core").file("src/data/jflex/skeleton.disable.buffer.expansion.txt")
+ skeleton = skeletonNoBufferExpansion
heapSize = "12g"
doFirst {
@@ -147,12 +91,12 @@ configure(project(":lucene:analysis:common")) {
}
}
- task jflexHTMLStripCharFilter(type: JFlexTask) {
+ task generateHTMLStripCharFilter(type: JFlexTask) {
description "Regenerate HTMLStripCharFilter.java"
group "generation"
jflexFile = file('src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex')
- skeleton = project(":lucene:core").file("src/data/jflex/skeleton.default")
+ skeleton = skeletonDefault
doFirst {
// Regenerate HTMLCharacterEntities.jflex first.
@@ -174,9 +118,72 @@ configure(project(":lucene:analysis:common")) {
}
}
- regenerate.dependsOn jflexUAX29URLEmailTokenizerImpl,
- jflexHTMLStripCharFilter,
- jflexClassicTokenizerImpl,
- jflexWikipediaTokenizerImpl,
- "tidy"
+ regenerate.dependsOn wrapWithPersistentChecksums(generateWikipediaTokenizer, "spotlessApply"),
+ wrapWithPersistentChecksums(generateClassicTokenizer, "spotlessApply"),
+ wrapWithPersistentChecksums(generateUAX29URLEmailTokenizer, "spotlessApply"),
+ wrapWithPersistentChecksums(generateHTMLStripCharFilter, "spotlessApply")
+}
+
+class JFlexTask extends DefaultTask {
+ @InputFile
+ File jflexFile
+
+ @InputFile
+ File skeleton
+
+ @Optional
+ String heapSize
+
+ @OutputFile
+ File getGeneratedFile() {
+ return project.file(jflexFile.absolutePath.replace(".jflex", ".java"))
+ }
+
+ JFlexTask() {
+ dependsOn(project.rootProject.configurations.jflex)
+ }
+
+ @TaskAction
+ def generate() {
+ if (!jflexFile || !jflexFile.exists()) {
+ throw new GradleException("JFlex file does not exist: ${jflexFile}")
+ }
+
+ def target = project.file(jflexFile.absolutePath.replace(".jflex", ".java"))
+
+ logger.lifecycle("Recompiling JFlex: ${project.rootDir.relativePath(jflexFile)}")
+
+ project.javaexec {
+ classpath {
+ project.rootProject.configurations.jflex
+ }
+
+ main = "jflex.Main"
+ args += [
+ "-nobak",
+ "--quiet",
+ "--encoding", "UTF-8",
+ ]
+
+ if (heapSize) {
+ maxHeapSize = heapSize
+ }
+
+ if (skeleton) {
+ args += ["--skel", skeleton.absolutePath]
+ }
+
+ args += [
+ "-d", target.parentFile.absolutePath,
+ jflexFile
+ ]
+ }
+
+ // Correct line endings for Windows.
+ project.ant.fixcrlf(
+ file: target,
+ encoding: "UTF-8",
+ eol: "lf"
+ )
+ }
}
diff --git a/lucene/core/src/data/jflex/skeleton.default b/gradle/generation/jflex/skeleton.default.txt
similarity index 100%
rename from lucene/core/src/data/jflex/skeleton.default
rename to gradle/generation/jflex/skeleton.default.txt
diff --git a/lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt b/gradle/generation/jflex/skeleton.disable.buffer.expansion.txt
similarity index 100%
rename from lucene/core/src/data/jflex/skeleton.disable.buffer.expansion.txt
rename to gradle/generation/jflex/skeleton.disable.buffer.expansion.txt
diff --git a/gradle/generation/util.gradle b/gradle/generation/moman.gradle
similarity index 67%
rename from gradle/generation/util.gradle
rename to gradle/generation/moman.gradle
index e672ecc..32a0e0d 100644
--- a/gradle/generation/util.gradle
+++ b/gradle/generation/moman.gradle
@@ -16,15 +16,18 @@
*/
apply plugin: "de.undercouch.download"
+def resources = scriptResources(buildscript)
+
configure(project(":lucene:core")) {
ext {
+ momanSource = "https://github.com/jpbarrette/moman/archive/497c90e34e412b6494db6dabf0d95db8034bd325.zip"
momanDir = file("${buildDir}/moman")
}
task installMoman(type: Download) {
def momanZip = file("${momanDir}/moman.zip")
- src "https://github.com/jpbarrette/moman/archive/497c90e34e412b6494db6dabf0d95db8034bd325.zip"
+ src momanSource
dest momanZip
onlyIfModified true
@@ -36,13 +39,16 @@ configure(project(":lucene:core")) {
}
task utilGenPacked(dependsOn: installMoman) {
- description "Regenerate util/PackedBulkOperationsPacked*.java and Packed64SingleBlock.java"
- group "generation"
+ def targetDir = file("src/java/org/apache/lucene/util/packed")
- doLast {
- def targetDir = file("src/java/org/apache/lucene/util/packed")
+ inputs.property("source", momanSource)
+ outputs.files fileTree(dir: targetDir, includes: ["Packed64SingleBlock.java", "BulkOperation*.java"])
- ['gen_BulkOperation.py', 'gen_Packed64SingleBlock.py'].each { prog ->
+ doLast {
+ [
+ file("${resources}/gen_BulkOperation.py"),
+ file("${resources}/gen_Packed64SingleBlock.py")
+ ].each { prog ->
logger.lifecycle("Executing: ${prog} in ${targetDir}")
quietExec {
workingDir targetDir
@@ -61,18 +67,22 @@ configure(project(":lucene:core")) {
}
task utilGenLev(dependsOn: installMoman) {
- description "Regenerate util/automaton Lev*ParametricDescription.java"
- group "generation"
+ def targetDir = file("src/java/org/apache/lucene/util/automaton")
- doLast {
- def targetDir = file("src/java/org/apache/lucene/util/automaton")
+ inputs.property("source", momanSource)
+ outputs.files fileTree(dir: targetDir, includes: ["*ParametricDescription.java"])
+ doLast {
['1', '2'].each { num ->
['True', 'False'].each { transpose ->
quietExec {
workingDir targetDir
executable project.externalTool("python3")
- args = ['-B', 'createLevAutomata.py', num, transpose, "${momanDir}/finenight/python"]
+ args = ['-B',
+ file("${resources}/createLevAutomata.py").toString(),
+ num,
+ transpose,
+ "${momanDir}/finenight/python"]
}
}
}
@@ -89,8 +99,9 @@ configure(project(":lucene:core")) {
description "Regenerate Moman-based sources."
group "generation"
- dependsOn utilGenLev, utilGenPacked
+ dependsOn wrapWithPersistentChecksums(utilGenPacked, "spotlessApply")
+ dependsOn wrapWithPersistentChecksums(utilGenLev, "spotlessApply")
}
- regenerate.dependsOn moman, "tidy"
+ regenerate.dependsOn moman
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/createLevAutomata.py b/gradle/generation/moman/createLevAutomata.py
similarity index 100%
rename from lucene/core/src/java/org/apache/lucene/util/automaton/createLevAutomata.py
rename to gradle/generation/moman/createLevAutomata.py
diff --git a/gradle/generation/moman/gen_BulkOperation.py b/gradle/generation/moman/gen_BulkOperation.py
new file mode 100644
index 0000000..ddb79cb
--- /dev/null
+++ b/gradle/generation/moman/gen_BulkOperation.py
@@ -0,0 +1,337 @@
+#! /usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+try:
+ # python 3.9+
+ from math import gcd
+except ImportError:
+ # old python
+ from fractions import gcd
+
+"""Code generation for bulk operations"""
+
+MAX_SPECIALIZED_BITS_PER_VALUE = 24;
+PACKED_64_SINGLE_BLOCK_BPV = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
+OUTPUT_FILE = "BulkOperation.java"
+HEADER = """// This file has been automatically generated, DO NOT EDIT
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util.packed;
+
+"""
+
+FOOTER = """
+ protected int writeLong(long block, byte[] blocks, int blocksOffset) {
+ for (int j = 1; j <= 8; ++j) {
+ blocks[blocksOffset++] = (byte) (block >>> (64 - (j << 3)));
+ }
+ return blocksOffset;
+ }
+
+ /**
+ * For every number of bits per value, there is a minimum number of
+ * blocks (b) / values (v) you need to write in order to reach the next block
+ * boundary:
+ * <pre>
+ * - 16 bits per value -> b=2, v=1
+ * - 24 bits per value -> b=3, v=1
+ * - 50 bits per value -> b=25, v=4
+ * - 63 bits per value -> b=63, v=8
+ * - ...
+ * </pre>
+ *
+ * A bulk read consists in copying <code>iterations*v</code> values that are
+ * contained in <code>iterations*b</code> blocks into a <code>long[]</code>
+ * (higher values of <code>iterations</code> are likely to yield a better
+ * throughput): this requires n * (b + 8v) bytes of memory.
+ *
+ * This method computes <code>iterations</code> as
+ * <code>ramBudget / (b + 8v)</code> (since a long is 8 bytes).
+ */
+ public final int computeIterations(int valueCount, int ramBudget) {
+ final int iterations = ramBudget / (byteBlockCount() + 8 * byteValueCount());
+ if (iterations == 0) {
+ // at least 1
+ return 1;
+ } else if ((iterations - 1) * byteValueCount() >= valueCount) {
+ // don't allocate for more than the size of the reader
+ return (int) Math.ceil((double) valueCount / byteValueCount());
+ } else {
+ return iterations;
+ }
+ }
+}
+"""
+
+def is_power_of_two(n):
+ return n & (n - 1) == 0
+
+def casts(typ):
+ cast_start = "(%s) (" % typ
+ cast_end = ")"
+ if typ == "long":
+ cast_start = ""
+ cast_end = ""
+ return cast_start, cast_end
+
+def hexNoLSuffix(n):
+ # On 32 bit Python values > (1 << 31)-1 will have L appended by hex function:
+ s = hex(n)
+ if s.endswith('L'):
+ s = s[:-1]
+ return s
+
+def masks(bits):
+ if bits == 64:
+ return "", ""
+ return "(", " & %sL)" % (hexNoLSuffix((1 << bits) - 1))
+
+def get_type(bits):
+ if bits == 8:
+ return "byte"
+ elif bits == 16:
+ return "short"
+ elif bits == 32:
+ return "int"
+ elif bits == 64:
+ return "long"
+ else:
+ assert False
+
+def block_value_count(bpv, bits=64):
+ blocks = bpv
+ values = blocks * bits // bpv
+ while blocks % 2 == 0 and values % 2 == 0:
+ blocks //= 2
+ values //= 2
+ assert values * bpv == bits * blocks, "%d values, %d blocks, %d bits per value" % (values, blocks, bpv)
+ return (blocks, values)
+
+def packed64(bpv, f):
+ mask = (1 << bpv) - 1
+
+ f.write("\n")
+ f.write(" public BulkOperationPacked%d() {\n" % bpv)
+ f.write(" super(%d);\n" % bpv)
+ f.write(" }\n\n")
+
+ if bpv == 64:
+ f.write(""" @Override
+ public void decode(long[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
+ System.arraycopy(blocks, blocksOffset, values, valuesOffset, valueCount() * iterations);
+ }
+
+ @Override
+ public void decode(long[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
+ LongBuffer.wrap(values, valuesOffset, iterations * valueCount()).put(ByteBuffer.wrap(blocks, blocksOffset, 8 * iterations * blockCount()).asLongBuffer());
+ }
+""")
+ else:
+ p64_decode(bpv, f, 32)
+ p64_decode(bpv, f, 64)
+
+def p64_decode(bpv, f, bits):
+ blocks, values = block_value_count(bpv)
+ typ = get_type(bits)
+ cast_start, cast_end = casts(typ)
+
+ f.write(" @Override\n")
+ f.write(" public void decode(long[] blocks, int blocksOffset, %s[] values, int valuesOffset, int iterations) {\n" % typ)
+ if bits < bpv:
+ f.write(" throw new UnsupportedOperationException();\n")
+ else:
+ f.write(" for (int i = 0; i < iterations; ++i) {\n")
+ mask = (1 << bpv) - 1
+
+ if is_power_of_two(bpv):
+ f.write(" final long block = blocks[blocksOffset++];\n")
+ f.write(" for (int shift = %d; shift >= 0; shift -= %d) {\n" % (64 - bpv, bpv))
+ f.write(" values[valuesOffset++] = %s(block >>> shift) & %d%s;\n" % (cast_start, mask, cast_end))
+ f.write(" }\n")
+ else:
+ for i in range(0, values):
+ block_offset = i * bpv // 64
+ bit_offset = (i * bpv) % 64
+ if bit_offset == 0:
+ # start of block
+ f.write(" final long block%d = blocks[blocksOffset++];\n" % block_offset);
+ f.write(" values[valuesOffset++] = %sblock%d >>> %d%s;\n" % (cast_start, block_offset, 64 - bpv, cast_end))
+ elif bit_offset + bpv == 64:
+ # end of block
+ f.write(" values[valuesOffset++] = %sblock%d & %dL%s;\n" % (cast_start, block_offset, mask, cast_end))
+ elif bit_offset + bpv < 64:
+ # middle of block
+ f.write(" values[valuesOffset++] = %s(block%d >>> %d) & %dL%s;\n" % (cast_start, block_offset, 64 - bit_offset - bpv, mask, cast_end))
+ else:
+ # value spans across 2 blocks
+ mask1 = (1 << (64 - bit_offset)) - 1
+ shift1 = bit_offset + bpv - 64
+ shift2 = 64 - shift1
+ f.write(" final long block%d = blocks[blocksOffset++];\n" % (block_offset + 1));
+ f.write(" values[valuesOffset++] = %s((block%d & %dL) << %d) | (block%d >>> %d)%s;\n" % (cast_start, block_offset, mask1, shift1, block_offset + 1, shift2, cast_end))
+ f.write(" }\n")
+ f.write(" }\n\n")
+
+ byte_blocks, byte_values = block_value_count(bpv, 8)
+
+ f.write(" @Override\n")
+ f.write(" public void decode(byte[] blocks, int blocksOffset, %s[] values, int valuesOffset, int iterations) {\n" % typ)
+ if bits < bpv:
+ f.write(" throw new UnsupportedOperationException();\n")
+ else:
+ if is_power_of_two(bpv) and bpv < 8:
+ f.write(" for (int j = 0; j < iterations; ++j) {\n")
+ f.write(" final byte block = blocks[blocksOffset++];\n")
+ for shift in range(8 - bpv, 0, -bpv):
+ f.write(" values[valuesOffset++] = (block >>> %d) & %d;\n" % (shift, mask))
+ f.write(" values[valuesOffset++] = block & %d;\n" % mask)
+ f.write(" }\n")
+ elif bpv == 8:
+ f.write(" for (int j = 0; j < iterations; ++j) {\n")
+ f.write(" values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;\n")
+ f.write(" }\n")
+ elif is_power_of_two(bpv) and bpv > 8:
+ f.write(" for (int j = 0; j < iterations; ++j) {\n")
+ m = bits <= 32 and "0xFF" or "0xFFL"
+ f.write(" values[valuesOffset++] =")
+ for i in range(bpv // 8 - 1):
+ f.write(" ((blocks[blocksOffset++] & %s) << %d) |" % (m, bpv - 8))
+ f.write(" (blocks[blocksOffset++] & %s);\n" % m)
+ f.write(" }\n")
+ else:
+ f.write(" for (int i = 0; i < iterations; ++i) {\n")
+ for i in range(0, byte_values):
+ byte_start = i * bpv // 8
+ bit_start = (i * bpv) % 8
+ byte_end = ((i + 1) * bpv - 1) // 8
+ bit_end = ((i + 1) * bpv - 1) % 8
+ shift = lambda b: 8 * (byte_end - b - 1) + 1 + bit_end
+ if bit_start == 0:
+ f.write(" final %s byte%d = blocks[blocksOffset++] & 0xFF;\n" % (typ, byte_start))
+ for b in range(byte_start + 1, byte_end + 1):
+ f.write(" final %s byte%d = blocks[blocksOffset++] & 0xFF;\n" % (typ, b))
+ f.write(" values[valuesOffset++] =")
+ if byte_start == byte_end:
+ if bit_start == 0:
+ if bit_end == 7:
+ f.write(" byte%d" % byte_start)
+ else:
+ f.write(" byte%d >>> %d" % (byte_start, 7 - bit_end))
+ else:
+ if bit_end == 7:
+ f.write(" byte%d & %d" % (byte_start, 2 ** (8 - bit_start) - 1))
+ else:
+ f.write(" (byte%d >>> %d) & %d" % (byte_start, 7 - bit_end, 2 ** (bit_end - bit_start + 1) - 1))
+ else:
+ if bit_start == 0:
+ f.write(" (byte%d << %d)" % (byte_start, shift(byte_start)))
+ else:
+ f.write(" ((byte%d & %d) << %d)" % (byte_start, 2 ** (8 - bit_start) - 1, shift(byte_start)))
+ for b in range(byte_start + 1, byte_end):
+ f.write(" | (byte%d << %d)" % (b, shift(b)))
+ if bit_end == 7:
+ f.write(" | byte%d" % byte_end)
+ else:
+ f.write(" | (byte%d >>> %d)" % (byte_end, 7 - bit_end))
+ f.write(";\n")
+ f.write(" }\n")
+ f.write(" }\n\n")
+
+if __name__ == '__main__':
+ f = open(OUTPUT_FILE, 'w')
+ f.write(HEADER)
+ f.write('\n')
+ f.write('''/**
+ * Efficient sequential read/write of packed integers.
+ */\n''')
+
+ f.write('abstract class BulkOperation implements PackedInts.Decoder, PackedInts.Encoder {\n')
+ f.write(' private static final BulkOperation[] packedBulkOps = new BulkOperation[] {\n')
+
+ for bpv in range(1, 65):
+ if bpv > MAX_SPECIALIZED_BITS_PER_VALUE:
+ f.write(' new BulkOperationPacked(%d),\n' % bpv)
+ continue
+ f2 = open('BulkOperationPacked%d.java' % bpv, 'w')
+ f2.write(HEADER)
+ if bpv == 64:
+ f2.write('import java.nio.LongBuffer;\n')
+ f2.write('import java.nio.ByteBuffer;\n')
+ f2.write('\n')
+ f2.write('''/**
+ * Efficient sequential read/write of packed integers.
+ */\n''')
+ f2.write('final class BulkOperationPacked%d extends BulkOperationPacked {\n' % bpv)
+ packed64(bpv, f2)
+ f2.write('}\n')
+ f2.close()
+ f.write(' new BulkOperationPacked%d(),\n' % bpv)
+
+ f.write(' };\n')
+ f.write('\n')
+
+ f.write(' // NOTE: this is sparse (some entries are null):\n')
+ f.write(' private static final BulkOperation[] packedSingleBlockBulkOps = new BulkOperation[] {\n')
+ for bpv in range(1, max(PACKED_64_SINGLE_BLOCK_BPV) + 1):
+ if bpv in PACKED_64_SINGLE_BLOCK_BPV:
+ f.write(' new BulkOperationPackedSingleBlock(%d),\n' % bpv)
+ else:
+ f.write(' null,\n')
+ f.write(' };\n')
+ f.write('\n')
+
+ f.write("\n")
+ f.write(" public static BulkOperation of(PackedInts.Format format, int bitsPerValue) {\n")
+ f.write(" switch (format) {\n")
+
+ f.write(" case PACKED:\n")
+ f.write(" assert packedBulkOps[bitsPerValue - 1] != null;\n")
+ f.write(" return packedBulkOps[bitsPerValue - 1];\n")
+ f.write(" case PACKED_SINGLE_BLOCK:\n")
+ f.write(" assert packedSingleBlockBulkOps[bitsPerValue - 1] != null;\n")
+ f.write(" return packedSingleBlockBulkOps[bitsPerValue - 1];\n")
+ f.write(" default:\n")
+ f.write(" throw new AssertionError();\n")
+ f.write(" }\n")
+ f.write(" }\n")
+ f.write(FOOTER)
+ f.close()
diff --git a/gradle/generation/moman/gen_Packed64SingleBlock.py b/gradle/generation/moman/gen_Packed64SingleBlock.py
new file mode 100644
index 0000000..65f38fd
--- /dev/null
+++ b/gradle/generation/moman/gen_Packed64SingleBlock.py
@@ -0,0 +1,298 @@
+#! /usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+SUPPORTED_BITS_PER_VALUE = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32]
+
+HEADER = """// This file has been automatically generated, DO NOT EDIT
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.lucene.util.packed;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * This class is similar to {@link Packed64} except that it trades space for
+ * speed by ensuring that a single block needs to be read/written in order to
+ * read/write a value.
+ */
+abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
+
+ public static final int MAX_SUPPORTED_BITS_PER_VALUE = %d;
+ private static final int[] SUPPORTED_BITS_PER_VALUE = new int[] {%s};
+
+ public static boolean isSupported(int bitsPerValue) {
+ return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
+ }
+
+ private static int requiredCapacity(int valueCount, int valuesPerBlock) {
+ return valueCount / valuesPerBlock
+ + (valueCount %% valuesPerBlock == 0 ? 0 : 1);
+ }
+
+ final long[] blocks;
+
+ Packed64SingleBlock(int valueCount, int bitsPerValue) {
+ super(valueCount, bitsPerValue);
+ assert isSupported(bitsPerValue);
+ final int valuesPerBlock = 64 / bitsPerValue;
+ blocks = new long[requiredCapacity(valueCount, valuesPerBlock)];
+ }
+
+ @Override
+ public void clear() {
+ Arrays.fill(blocks, 0L);
+ }
+
+ @Override
+ public long ramBytesUsed() {
+ return RamUsageEstimator.alignObjectSize(
+ RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ + 2 * Integer.BYTES // valueCount,bitsPerValue
+ + RamUsageEstimator.NUM_BYTES_OBJECT_REF) // blocks ref
+ + RamUsageEstimator.sizeOf(blocks);
+ }
+
+ @Override
+ public int get(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ len = Math.min(len, valueCount - index);
+ assert off + len <= arr.length;
+
+ final int originalIndex = index;
+
+ // go to the next block boundary
+ final int valuesPerBlock = 64 / bitsPerValue;
+ final int offsetInBlock = index %% valuesPerBlock;
+ if (offsetInBlock != 0) {
+ for (int i = offsetInBlock; i < valuesPerBlock && len > 0; ++i) {
+ arr[off++] = get(index++);
+ --len;
+ }
+ if (len == 0) {
+ return index - originalIndex;
+ }
+ }
+
+ // bulk get
+ assert index %% valuesPerBlock == 0;
+ @SuppressWarnings("deprecation")
+ final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
+ assert decoder.longBlockCount() == 1;
+ assert decoder.longValueCount() == valuesPerBlock;
+ final int blockIndex = index / valuesPerBlock;
+ final int nblocks = (index + len) / valuesPerBlock - blockIndex;
+ decoder.decode(blocks, blockIndex, arr, off, nblocks);
+ final int diff = nblocks * valuesPerBlock;
+ index += diff; len -= diff;
+
+ if (index > originalIndex) {
+ // stay at the block boundary
+ return index - originalIndex;
+ } else {
+ // no progress so far => already at a block boundary but no full block to
+ // get
+ assert index == originalIndex;
+ return super.get(index, arr, off, len);
+ }
+ }
+
+ @Override
+ public int set(int index, long[] arr, int off, int len) {
+ assert len > 0 : "len must be > 0 (got " + len + ")";
+ assert index >= 0 && index < valueCount;
+ len = Math.min(len, valueCount - index);
+ assert off + len <= arr.length;
+
+ final int originalIndex = index;
+
+ // go to the next block boundary
+ final int valuesPerBlock = 64 / bitsPerValue;
+ final int offsetInBlock = index %% valuesPerBlock;
+ if (offsetInBlock != 0) {
+ for (int i = offsetInBlock; i < valuesPerBlock && len > 0; ++i) {
+ set(index++, arr[off++]);
+ --len;
+ }
+ if (len == 0) {
+ return index - originalIndex;
+ }
+ }
+
+ // bulk set
+ assert index %% valuesPerBlock == 0;
+ @SuppressWarnings("deprecation")
+ final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
+ assert op.longBlockCount() == 1;
+ assert op.longValueCount() == valuesPerBlock;
+ final int blockIndex = index / valuesPerBlock;
+ final int nblocks = (index + len) / valuesPerBlock - blockIndex;
+ op.encode(arr, off, blocks, blockIndex, nblocks);
+ final int diff = nblocks * valuesPerBlock;
+ index += diff; len -= diff;
+
+ if (index > originalIndex) {
+ // stay at the block boundary
+ return index - originalIndex;
+ } else {
+ // no progress so far => already at a block boundary but no full block to
+ // set
+ assert index == originalIndex;
+ return super.set(index, arr, off, len);
+ }
+ }
+
+ @Override
+ public void fill(int fromIndex, int toIndex, long val) {
+ assert fromIndex >= 0;
+ assert fromIndex <= toIndex;
+ assert PackedInts.unsignedBitsRequired(val) <= bitsPerValue;
+
+ final int valuesPerBlock = 64 / bitsPerValue;
+ if (toIndex - fromIndex <= valuesPerBlock << 1) {
+ // there needs to be at least one full block to set for the block
+ // approach to be worth trying
+ super.fill(fromIndex, toIndex, val);
+ return;
+ }
+
+ // set values naively until the next block start
+ int fromOffsetInBlock = fromIndex %% valuesPerBlock;
+ if (fromOffsetInBlock != 0) {
+ for (int i = fromOffsetInBlock; i < valuesPerBlock; ++i) {
+ set(fromIndex++, val);
+ }
+ assert fromIndex %% valuesPerBlock == 0;
+ }
+
+ // bulk set of the inner blocks
+ final int fromBlock = fromIndex / valuesPerBlock;
+ final int toBlock = toIndex / valuesPerBlock;
+ assert fromBlock * valuesPerBlock == fromIndex;
+
+ long blockValue = 0L;
+ for (int i = 0; i < valuesPerBlock; ++i) {
+ blockValue = blockValue | (val << (i * bitsPerValue));
+ }
+ Arrays.fill(blocks, fromBlock, toBlock, blockValue);
+
+ // fill the gap
+ for (int i = valuesPerBlock * toBlock; i < toIndex; ++i) {
+ set(i, val);
+ }
+ }
+
+ @Override
+ @SuppressWarnings("deprecation")
+ protected PackedInts.Format getFormat() {
+ return PackedInts.Format.PACKED_SINGLE_BLOCK;
+ }
+
+ @Override
+ public String toString() {
+ return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue
+ + ",size=" + size() + ",blocks=" + blocks.length + ")";
+ }
+
+ public static Packed64SingleBlock create(DataInput in,
+ int valueCount, int bitsPerValue) throws IOException {
+ Packed64SingleBlock reader = create(valueCount, bitsPerValue);
+ for (int i = 0; i < reader.blocks.length; ++i) {
+ reader.blocks[i] = in.readLong();
+ }
+ return reader;
+ }
+
+""" % (SUPPORTED_BITS_PER_VALUE[-1], ", ".join(map(str, SUPPORTED_BITS_PER_VALUE)))
+
+FOOTER = "}"
+
+if __name__ == '__main__':
+
+ f = open("Packed64SingleBlock.java", 'w')
+ f.write(HEADER)
+ f.write(" public static Packed64SingleBlock create(int valueCount, int bitsPerValue) {\n")
+ f.write(" switch (bitsPerValue) {\n")
+ for bpv in SUPPORTED_BITS_PER_VALUE:
+ f.write(" case %d:\n" % bpv)
+ f.write(" return new Packed64SingleBlock%d(valueCount);\n" % bpv)
+ f.write(" default:\n")
+ f.write(" throw new IllegalArgumentException(\"Unsupported number of bits per value: \" + %d);\n" % bpv)
+ f.write(" }\n")
+ f.write(" }\n\n")
+
+ for bpv in SUPPORTED_BITS_PER_VALUE:
+ log_2 = 0
+ while (1 << log_2) < bpv:
+ log_2 = log_2 + 1
+ if (1 << log_2) != bpv:
+ log_2 = None
+
+ f.write(" static class Packed64SingleBlock%d extends Packed64SingleBlock {\n\n" % bpv)
+
+ f.write(" Packed64SingleBlock%d(int valueCount) {\n" % bpv)
+ f.write(" super(valueCount, %d);\n" % bpv)
+ f.write(" }\n\n")
+
+ f.write(" @Override\n")
+ f.write(" public long get(int index) {\n")
+ if log_2 is not None:
+ f.write(" final int o = index >>> %d;\n" % (6 - log_2))
+ f.write(" final int b = index & %d;\n" % ((1 << (6 - log_2)) - 1))
+ f.write(" final int shift = b << %d;\n" % log_2)
+ else:
+ f.write(" final int o = index / %d;\n" % (64 / bpv))
+ f.write(" final int b = index %% %d;\n" % (64 / bpv))
+ f.write(" final int shift = b * %d;\n" % bpv)
+ f.write(" return (blocks[o] >>> shift) & %dL;\n" % ((1 << bpv) - 1))
+ f.write(" }\n\n")
+
+ f.write(" @Override\n")
+ f.write(" public void set(int index, long value) {\n")
+ if log_2 is not None:
+ f.write(" final int o = index >>> %d;\n" % (6 - log_2))
+ f.write(" final int b = index & %d;\n" % ((1 << (6 - log_2)) - 1))
+ f.write(" final int shift = b << %d;\n" % log_2)
+ else:
+ f.write(" final int o = index / %d;\n" % (64 / bpv))
+ f.write(" final int b = index %% %d;\n" % (64 / bpv))
+ f.write(" final int shift = b * %d;\n" % bpv)
+ f.write(" blocks[o] = (blocks[o] & ~(%dL << shift)) | (value << shift);\n" % ((1 << bpv) - 1))
+ f.write(" }\n\n")
+ f.write(" }\n\n")
+
+ f.write(FOOTER)
+ f.close()
diff --git a/gradle/generation/regenerate.gradle b/gradle/generation/regenerate.gradle
index 8bbd367..920c40c 100644
--- a/gradle/generation/regenerate.gradle
+++ b/gradle/generation/regenerate.gradle
@@ -1,3 +1,7 @@
+import groovy.json.JsonOutput
+import groovy.json.JsonSlurper
+import org.apache.commons.codec.digest.DigestUtils
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -30,29 +34,145 @@ configure([
group "generation"
}
- // Make sure 'tidy' and its dependencies run after any other task in the dependencies
- // of 'regenerate'. This ensures proper execution ordering so that tidy tasks run
- // after whatever has been generated is complete.
- afterEvaluate {
- Set<Task> deps = regenerate.getTaskDependencies().getDependencies(regenerate)
- def tidy = deps.find { it.name == "tidy" }
- if (tidy) {
- TaskDependency dep = tidy.taskDependencies
- Set<Task> visited = new HashSet<>()
- Queue<Task> queue = new ArrayDeque<>()
- queue.add(tidy)
- while (!queue.isEmpty()) {
- Task t = queue.removeFirst()
- if (visited.add(t)) {
- queue.addAll(dep.getDependencies(t))
+ project.ext {
+ // This utility method implements the logic required for "persistent" incremental
+ // source-generating tasks. The idea is simple, the implementation quite complex.
+ //
+ // The idea is that, given source-generating task "sourceTask" we create
+ // a bunch of other tasks that perform checksum generation, validation and sourceTask
+ // skipping; example:
+ //
+ // ${sourceTask}ChecksumLoad
+ // ${sourceTask}ChecksumSave
+ // ${sourceTask}ChecksumCheck (fails if checksums are inconsistent)
+ // maybe${sourceTask} dependsOn [checksum-load, sourceTask, checksum-save]
+ //
+ // Checksums are persisted and computed from sourceTask's inputs/outputs. If the
+ // persisted checksums are identical to current checksums, sourceTask
+ // is skipped (via sourceTask.onlyIf { false }).
+ //
+ // Implementation-wise things get complicated because gradle doesn't have the notion
+ // of "ordered" task execution with respect to task AND its dependencies (we can add
+ // constraints to each node in the execution graph but not node-and-dependencies).
+ //
+ // sourceTask - the task to wrap
+ // otherTasks - other tasks that should be scheduled to run after source task and
+ // before checksum calculation.
+ wrapWithPersistentChecksums = { Task sourceTask, Object... otherTasks ->
+ // Create checksum-loader task.
+ Task checksumLoadTask = tasks.create("${sourceTask.name}ChecksumLoad", {
+ ext {
+ checksumMatch = true
+ }
+
+ doFirst {
+ // Collect all of task inputs/ outputs.
+ FileCollection allFiles = sourceTask.inputs.files + sourceTask.outputs.files
+ ext.allFiles = allFiles
+
+ // Compute checksums for root-project relative paths
+ Map<String, String> actualChecksums = allFiles.files.collectEntries { file ->
+ [
+ sourceTask.project.rootDir.relativePath(file),
+ file.exists() ? new DigestUtils(DigestUtils.sha1Digest).digestAsHex(file).trim() : "--"
+ ]
+ }
+ ext.actualChecksums = actualChecksums
+
+ // Load any previously written checksums
+ ext.checksumsFile = project.file("src/generated/checksums/${sourceTask.name}.json")
+ Map<String, String> savedChecksums = [:]
+ if (checksumsFile.exists()) {
+ savedChecksums = new JsonSlurper().parse(checksumsFile) as Map
+ }
+ ext.savedChecksums = savedChecksums
+
+ ext.checksumMatch = (savedChecksums.equals(actualChecksums))
+ }
+ })
+
+ Task checksumCheckTask = tasks.create("${sourceTask.name}ChecksumCheck", {
+ dependsOn checksumLoadTask
+
+ doFirst {
+ if (!checksumLoadTask.checksumMatch) {
+ // This can be made prettier but leave it verbose for now:
+ Map<String, String> actual = checksumLoadTask.actualChecksums
+ Map<String, String> expected = checksumLoadTask.savedChecksums
+
+ def same = actual.intersect(expected)
+ actual = actual - same
+ expected = expected - same
+
+ throw new GradleException("Checksums mismatch for derived resources; you might have" +
+ " modified a generated source file?:\n" +
+ "Actual:\n ${actual.entrySet().join('\n ')}\n\n" +
+ "Expected:\n ${expected.entrySet().join('\n ')}"
+ )
+ }
+ }
+ })
+ check.dependsOn checksumCheckTask
+
+ Task checksumSaveTask = tasks.create("${sourceTask.name}ChecksumSave", {
+ dependsOn checksumLoadTask
+
+ doFirst {
+ File checksumsFile = checksumLoadTask.ext.checksumsFile
+ checksumsFile.parentFile.mkdirs()
+
+ // Recompute checksums for root-project relative paths
+ Map<String, String> actualChecksums = checksumLoadTask.ext.allFiles.files.collectEntries { file ->
+ [
+ sourceTask.project.rootDir.relativePath(file),
+ new DigestUtils(DigestUtils.sha1Digest).digestAsHex(file).trim()
+ ]
+ }
+
+ checksumsFile.setText(
+ JsonOutput.prettyPrint(JsonOutput.toJson(actualChecksums)), "UTF-8")
+
+ logger.warn("Updated generated file checksums for task ${sourceTask.path}.")
+ }
+ })
+
+ Task conditionalTask = tasks.create("${sourceTask.name}IfChanged", {
+ def deps = [
+ checksumLoadTask,
+ sourceTask,
+ *otherTasks,
+ checksumSaveTask
+ ].flatten()
+
+ dependsOn deps
+ mustRunInOrder deps
+
+ doFirst {
+ if (checksumLoadTask.checksumMatch) {
+ logger.lifecycle("Checksums consistent with sources, skipping task: ${sourceTask.path}")
+ }
+ }
+ })
+
+ // Copy the description and group from the source task.
+ project.afterEvaluate {
+ conditionalTask.group sourceTask.group
+ conditionalTask.description sourceTask.description + " (if sources changed)"
+ }
+
+ // Set conditional execution only if checksum mismatch occurred.
+ if (!gradle.startParameter.isRerunTasks()) {
+ project.afterEvaluate {
+ resolveTaskRefs([sourceTask, checksumSaveTask]).each { t ->
+ t.configure {
+ logger.info("Making " + t.name + " run only if " + checksumLoadTask.name + " indicates changes")
+ onlyIf { !checksumLoadTask.checksumMatch }
+ }
+ }
}
}
-
- def intersection = visited.intersect(deps)
- def tidyDeps = visited - intersection + [tidy]
- def genDeps = deps - intersection
- tidyDeps.each { Task t -> t.mustRunAfter(genDeps) }
+ return conditionalTask
}
}
}
\ No newline at end of file
diff --git a/gradle/generation/snowball.gradle b/gradle/generation/snowball.gradle
index 7ea4556..4576367 100644
--- a/gradle/generation/snowball.gradle
+++ b/gradle/generation/snowball.gradle
@@ -114,5 +114,5 @@ configure(project(":lucene:analysis:common")) {
}
}
- regenerate.dependsOn snowball, "tidy"
+ regenerate.dependsOn mustRunInOrder([snowball, "spotlessApply"])
}
diff --git a/gradle/globals.gradle b/gradle/globals.gradle
index 66b1660..4f4ca7d 100644
--- a/gradle/globals.gradle
+++ b/gradle/globals.gradle
@@ -113,5 +113,35 @@ allprojects {
return result
}
+
+ // Convert a list of strings, tasks and task providers into resolved tasks or task providers.
+ resolveTaskRefs = { List<Object> refs ->
+ def resolved = refs.collect {
+ if (it instanceof Task) return it
+ if (it instanceof TaskProvider) return it
+ if (it instanceof String) return project.tasks.named((String) it)
+ throw new GradleException("Can't resolve task: ${it}")
+ }
+ return resolved
+ }
+
+ // Forces sequential ordering of a list of tasks (via mustRunAfter).
+ // This method should not be required in 99% of cases, consider regular dependsOn links.
+ // This method does NOT imply any ordering between dependencies of task on the input
+ // list - the execution of these may still be unordered.
+ mustRunInOrder = { List<Object> taskList ->
+ project.afterEvaluate {
+ def resolved = resolveTaskRefs(taskList)
+
+ // Enforce sequential ordering between tasks (this does NOT apply to their dependencies!)
+ for (int i = 1; i < resolved.size(); i++) {
+ resolved[i].configure {
+ logger.info("Scheduling " + resolved[i].name + " to run after " + resolved[i - 1].name)
+ mustRunAfter resolved[i - 1]
+ }
+ }
+ }
+ return taskList
+ }
}
}
diff --git a/gradle/publishing/distribution.gradle b/gradle/publishing/distribution.gradle
index b05b826..9483747 100644
--- a/gradle/publishing/distribution.gradle
+++ b/gradle/publishing/distribution.gradle
@@ -17,17 +17,6 @@
import org.apache.commons.codec.digest.DigestUtils
-// We're using commons-codec for computing checksums.
-buildscript {
- repositories {
- mavenCentral()
- }
-
- dependencies {
- classpath "commons-codec:commons-codec:${scriptDepVersions['commons-codec']}"
- }
-}
-
allprojects {
plugins.withType(DistributionPlugin) {
def checksum = {
diff --git a/gradle/validation/jar-checks.gradle b/gradle/validation/jar-checks.gradle
index b7c8603..0b344d1 100644
--- a/gradle/validation/jar-checks.gradle
+++ b/gradle/validation/jar-checks.gradle
@@ -25,17 +25,6 @@ import org.apache.commons.codec.digest.DigestUtils
// This should be false only for debugging.
def failOnError = true
-// We're using commons-codec for computing checksums.
-buildscript {
- repositories {
- mavenCentral()
- }
-
- dependencies {
- classpath "commons-codec:commons-codec:${scriptDepVersions['commons-codec']}"
- }
-}
-
// Configure license checksum folder for top-level projects.
// (The file("licenses") inside the configure scope resolves
// relative to the current project so they're not the same).
diff --git a/gradle/validation/precommit.gradle b/gradle/validation/precommit.gradle
index 4a04201..eeda128 100644
--- a/gradle/validation/precommit.gradle
+++ b/gradle/validation/precommit.gradle
@@ -26,11 +26,14 @@ configure(rootProject) {
dependsOn ":verifyLocks"
dependsOn ":versionsPropsAreSorted"
dependsOn ":checkWorkingCopyClean"
+ }
- // Attach all these tasks from all projects that have them.
- // This uses lazy collections as they may not yet be defined.
- dependsOn allprojects.collect { prj ->
- prj.tasks.matching { task -> task.name in [
+ // Attach all these tasks from all projects that have them.
+ // This uses lazy collections as they may not yet be defined.
+ allprojects { prj ->
+ precommit.dependsOn prj.tasks.matching { task ->
+ return task.name.endsWith("ChecksumCheck") ||
+ task.name in [
"forbiddenApis",
"licenses",
"javadoc",
@@ -39,7 +42,6 @@ configure(rootProject) {
"validateSourcePatterns",
"spotlessCheck"
]}
- }
}
// Each validation task should be attached to check but make sure
diff --git a/lucene/analysis/common/checksums.properties b/lucene/analysis/common/checksums.properties
new file mode 100644
index 0000000..ce06580
--- /dev/null
+++ b/lucene/analysis/common/checksums.properties
@@ -0,0 +1,2 @@
+
+checksum.jflexClassicTokenizerImpl=8c4eac5fd02be551e666783df5531afda23cbc96
\ No newline at end of file
diff --git a/lucene/analysis/common/src/generated/checksums/generateClassicTokenizer.json b/lucene/analysis/common/src/generated/checksums/generateClassicTokenizer.json
new file mode 100644
index 0000000..9f53446
--- /dev/null
+++ b/lucene/analysis/common/src/generated/checksums/generateClassicTokenizer.json
@@ -0,0 +1,5 @@
+{
+ "lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.jflex": "958b028ef3f0aec36488fb2bb033cdec5858035f",
+ "gradle/generation/jflex/skeleton.default.txt": "ca1043249c0eefdf2623a785e2b91f5608bfc3f1",
+ "lucene/analysis/common/src/java/org/apache/lucene/analysis/classic/ClassicTokenizerImpl.java": "21c2cf7ba0a0cdeb43ebe624101e259c9348f6b0"
+}
\ No newline at end of file
diff --git a/lucene/analysis/common/src/generated/checksums/generateHTMLStripCharFilter.json b/lucene/analysis/common/src/generated/checksums/generateHTMLStripCharFilter.json
new file mode 100644
index 0000000..f9f6ea3
--- /dev/null
+++ b/lucene/analysis/common/src/generated/checksums/generateHTMLStripCharFilter.json
@@ -0,0 +1,5 @@
+{
+ "lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex": "71760e2f7abe078109545a0c68aeac9125508d7c",
+ "gradle/generation/jflex/skeleton.default.txt": "ca1043249c0eefdf2623a785e2b91f5608bfc3f1",
+ "lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java": "78f5208455706d60a9ce4b63624ed04b0fd32573"
+}
\ No newline at end of file
diff --git a/lucene/analysis/common/src/generated/checksums/generateUAX29URLEmailTokenizer.json b/lucene/analysis/common/src/generated/checksums/generateUAX29URLEmailTokenizer.json
new file mode 100644
index 0000000..a022612
--- /dev/null
+++ b/lucene/analysis/common/src/generated/checksums/generateUAX29URLEmailTokenizer.json
@@ -0,0 +1,5 @@
+{
+ "lucene/analysis/common/src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.jflex": "472a33bcf5741bc8923aaa2717000d9ccd62f1e2",
+ "gradle/generation/jflex/skeleton.disable.buffer.expansion.txt": "68263ff0a014904c6e89b040d868d8f399408908",
+ "lucene/analysis/common/src/java/org/apache/lucene/analysis/email/UAX29URLEmailTokenizerImpl.java": "ebeb76148ee02612841c463819f86d8932c9a7c3"
+}
\ No newline at end of file
diff --git a/lucene/analysis/common/src/generated/checksums/generateWikipediaTokenizer.json b/lucene/analysis/common/src/generated/checksums/generateWikipediaTokenizer.json
new file mode 100644
index 0000000..2bc0334
--- /dev/null
+++ b/lucene/analysis/common/src/generated/checksums/generateWikipediaTokenizer.json
@@ -0,0 +1,5 @@
+{
+ "lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex": "a23a4b7cbcdba1fc864c0b85bc2784c8893a0f9f",
+ "gradle/generation/jflex/skeleton.default.txt": "ca1043249c0eefdf2623a785e2b91f5608bfc3f1",
+ "lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java": "10b391af6953d2f7bcca86da835a1037705509ec"
+}
\ No newline at end of file
diff --git a/lucene/core/src/generated/checksums/generateStandardTokenizer.json b/lucene/core/src/generated/checksums/generateStandardTokenizer.json
new file mode 100644
index 0000000..1497072
--- /dev/null
+++ b/lucene/core/src/generated/checksums/generateStandardTokenizer.json
@@ -0,0 +1,5 @@
+{
+ "lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex": "6158aeb8dd11cd9100623608b2dcce51b2df9d0b",
+ "gradle/generation/jflex/skeleton.disable.buffer.expansion.txt": "68263ff0a014904c6e89b040d868d8f399408908",
+ "lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java": "8e33c2698446c1c7a9479796a41316d1932ceda9"
+}
\ No newline at end of file
diff --git a/lucene/core/src/generated/checksums/utilGenLev.json b/lucene/core/src/generated/checksums/utilGenLev.json
new file mode 100644
index 0000000..3f72977
--- /dev/null
+++ b/lucene/core/src/generated/checksums/utilGenLev.json
@@ -0,0 +1,6 @@
+{
+ "lucene/core/src/java/org/apache/lucene/util/automaton/Lev1ParametricDescription.java": "8a07d087eba9db1bc228b9dbc4e3b9294dac8478",
+ "lucene/core/src/java/org/apache/lucene/util/automaton/Lev1TParametricDescription.java": "a328606a8933fe2f989bf3dbed84aa34fb4113ed",
+ "lucene/core/src/java/org/apache/lucene/util/automaton/Lev2ParametricDescription.java": "0d839846eb3cbe0ef62576ab33d63a97c28a8b45",
+ "lucene/core/src/java/org/apache/lucene/util/automaton/Lev2TParametricDescription.java": "7c29a828a20f084c4998179fd6a4ee9aa909c1ce"
+}
\ No newline at end of file
diff --git a/lucene/core/src/generated/checksums/utilGenPacked.json b/lucene/core/src/generated/checksums/utilGenPacked.json
new file mode 100644
index 0000000..da6f7ea
--- /dev/null
+++ b/lucene/core/src/generated/checksums/utilGenPacked.json
@@ -0,0 +1,30 @@
+{
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java": "c4e16930960a18e74802c56ee60f8e83bd8b6dd2",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked.java": "2b0d9226bae8a07ce4970bcaa9d4d0cd4fe2c79a",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked1.java": "c483aa35c275bacc1f3a010c5b879441be502108",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked10.java": "35fe8a9c9a91cd840b239af4ddd1e0de53ef1404",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked11.java": "2192613a836dcb4051260cbc7fb6fb3e4c84ef61",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked12.java": "0ddb99688fb194eef272fb49ea71e4928b24721d",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked13.java": "434359f18b054c7e86a26792dbf6f4d649f7161d",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked14.java": "bdf6a39874e8a4a455e48471b0d914568c0842d8",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked15.java": "8f6f529b87df6aa45a3aff8797a35305679dd037",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked16.java": "3e2814f8925e58cbf5d6d380c6d9d7c5b5a529c9",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked17.java": "b7be72b55206f68bbc9d035c47f42baa31d04bbb",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked18.java": "99aa8c806dc92ddb1fdca115445480e81c65b094",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked19.java": "c380914c3bf8c07aec57829f3480c8c2ce3eb1e3",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked2.java": "ce738a120c6db57a5e5380e1f8c83c7e7ec0926f",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked20.java": "aa6fc8768fd72cea621d99e80bca75529dd8850c",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked21.java": "678b4b7b994ca6af7de4dccf63267913bd361da1",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked22.java": "c8cf4a7b633927969c488f79e975617d13914a09",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked23.java": "70c1c3b9f48379622cf0c6d5927932a463aa26b9",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked24.java": "d244c38c0c3978486aa28c2a39f1b3c79444cf8a",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked3.java": "c4741da6bba61e104d767363cb4f2cb45bc5c00b",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked4.java": "06a9241248cff07f8f1077d46c16cb0da50837f1",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked5.java": "1c89ed72f462b5d332e86b97ed961df4ed0598af",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked6.java": "5d0dd41c9b26056bc9ff595c4abc5f7158b5ad94",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked7.java": "f341a17920904b7f4ec4cda5d15802fd122a3345",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked8.java": "bc5124047b26fc0be147db5bc855be038d306f65",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked9.java": "1121f69ea6d830ab6f4bd2f51d017b792c17d1b1",
+ "lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPackedSingleBlock.java": "36984601502fcc812eb9d9a845fa10774e575653",
+ "lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java": "18c97614f29045519a8d440a35c685c50a5e9a34"
+}
\ No newline at end of file
diff --git a/lucene/queryparser/src/generated/checksums/javaccParserClassic.json b/lucene/queryparser/src/generated/checksums/javaccParserClassic.json
new file mode 100644
index 0000000..48707e9
--- /dev/null
+++ b/lucene/queryparser/src/generated/checksums/javaccParserClassic.json
@@ -0,0 +1,9 @@
+{
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj": "c9584bbe50c3c7479f72ea84145ebbf034a201ea",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java": "e0f1cced0f9448dea63b03931a5287e701b8b8cd",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserConstants.java": "e59a3fd38b66a3d56779c55955c1e014225a1f50",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java": "cba572aa235f3098383a26c369b5585c708647d8",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java": "0f421768d8a964a00a6566180fe26547ff2f3e1e",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java": "310665ba37d982327fcb55cc3523d629ef29ef54",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java": "9927caf69afb3a06bc679995cabb14f3b56e16c7"
+}
\ No newline at end of file
diff --git a/lucene/queryparser/src/generated/checksums/javaccParserFlexible.json b/lucene/queryparser/src/generated/checksums/javaccParserFlexible.json
new file mode 100644
index 0000000..b1d7035
--- /dev/null
+++ b/lucene/queryparser/src/generated/checksums/javaccParserFlexible.json
@@ -0,0 +1,9 @@
+{
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj": "08b62ed73607b1646af5dadb81c8bb34e381daee",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java": "75e9d84f424bb697f899fe3adacc0094bac00672",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserConstants.java": "e73933bff38a62d90dab64f72a1a0deadfff246f",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java": "6e503b48ffa9f4648798e5394f7baeec366d1f07",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java": "3d5f272a6d56b3f4962b252267ce2662e734414e",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java": "f4cb9d01587279dba30e549ce4867e4381bbd9d7",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java": "cdfa99af5fcf6b1e50691a1c1370ba60bf0d2d2d"
+}
\ No newline at end of file
diff --git a/lucene/queryparser/src/generated/checksums/javaccParserSurround.json b/lucene/queryparser/src/generated/checksums/javaccParserSurround.json
new file mode 100644
index 0000000..edb0713
--- /dev/null
+++ b/lucene/queryparser/src/generated/checksums/javaccParserSurround.json
@@ -0,0 +1,9 @@
+{
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/QueryParser.jj": "21b38627431747c741e2ec24be1e7aef38dc70c9",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/QueryParser.java": "6e5f595be9084b24f0025ccd4b7b4cb11b0bf4b8",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/QueryParserConstants.java": "8feb77878890c27e874be457d839eba48192c40f",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/QueryParserTokenManager.java": "7b47f2a971aa94339831b8ab17e1cfe401add06c",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/ParseException.java": "37613d8e8557bd17e4af1b0b0279e75094f409fb",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/Token.java": "a5eea2a3043e0aa2781f4a43b9ab9c5d59add80e",
+ "lucene/queryparser/src/java/org/apache/lucene/queryparser/surround/parser/TokenMgrError.java": "77350c188e18ff8338088b1e14b6b34c9d0089eb"
+}
\ No newline at end of file