You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2020/02/17 16:13:51 UTC
[lucene-solr] branch jira/LUCENE-9220 updated: LUCENE-9220:
simplify regeneration logic, handle if language gets deleted, etc
This is an automated email from the ASF dual-hosted git repository.
rmuir pushed a commit to branch jira/LUCENE-9220
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/jira/LUCENE-9220 by this push:
new 76ed7d2 LUCENE-9220: simplify regeneration logic, handle if language gets deleted, etc
76ed7d2 is described below
commit 76ed7d2cc3a4ed0681b262385d59127b2377a75f
Author: Robert Muir <rm...@apache.org>
AuthorDate: Mon Feb 17 11:13:44 2020 -0500
LUCENE-9220: simplify regeneration logic, handle if language gets deleted, etc
---
gradle/generation/snowball.sh | 26 ++++++++++++--------------
1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/gradle/generation/snowball.sh b/gradle/generation/snowball.sh
index 6b267fe..27aaf36 100755
--- a/gradle/generation/snowball.sh
+++ b/gradle/generation/snowball.sh
@@ -72,29 +72,27 @@ done
# regenerate test data
rm -f ${TESTDSTDIR}/test_languages.txt
+rm -f ${TESTDSTDIR}/*.zip
for file in ${TESTSRCDIR}/*; do
- if [ -f "${file}/voc.txt" ] && [ -f "${file}/output.txt" ]; then
+ # look for input (voc.txt) and expected output (output.txt) without any special licenses (COPYING)
+ if [ -f "${file}/voc.txt" ] && [ -f "${file}/output.txt" ] && [ ! -f "${file}/COPYING" ]; then
language=$(basename ${file})
if [ "${language}" == "kraaij_pohlmann" ]; then
language="kp"
fi
- rm -f ${TESTDSTDIR}/${language}.zip
# make the .zip reproducible if data hasn't changed.
arbitrary_timestamp="200001010000"
# some test files are yuge, randomly sample up to this amount
row_limit="2000"
- # TODO: for now don't deal with any special licenses
- if [ ! -f "${file}/COPYING" ]; then
- tmpdir=$(mktemp -d)
- myrandom="openssl enc -aes-256-ctr -k ${arbitrary_timestamp} -nosalt -iv 0 -md md5"
- for data in "voc.txt" "output.txt"; do
- shuf -n ${row_limit} --random-source=<(${myrandom} < /dev/zero 2>/dev/null) ${file}/${data} > ${tmpdir}/${data} \
- && touch -t ${arbitrary_timestamp} ${tmpdir}/${data}
- done
- zip --quiet --junk-paths -X -9 ${TESTDSTDIR}/${language}.zip ${tmpdir}/voc.txt ${tmpdir}/output.txt
- echo "${language}" >> ${TESTDSTDIR}/test_languages.txt
- rm -r ${tmpdir}
- fi
+ tmpdir=$(mktemp -d)
+ myrandom="openssl enc -aes-256-ctr -k ${arbitrary_timestamp} -nosalt -iv 0 -md md5"
+ for data in "voc.txt" "output.txt"; do
+ shuf -n ${row_limit} --random-source=<(${myrandom} < /dev/zero 2>/dev/null) ${file}/${data} > ${tmpdir}/${data} \
+ && touch -t ${arbitrary_timestamp} ${tmpdir}/${data}
+ done
+ zip --quiet --junk-paths -X -9 ${TESTDSTDIR}/${language}.zip ${tmpdir}/voc.txt ${tmpdir}/output.txt
+ echo "${language}" >> ${TESTDSTDIR}/test_languages.txt
+ rm -r ${tmpdir}
fi
done