You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2020/02/17 16:13:51 UTC

[lucene-solr] branch jira/LUCENE-9220 updated: LUCENE-9220: simplify regeneration logic, handle if language gets deleted, etc

This is an automated email from the ASF dual-hosted git repository.

rmuir pushed a commit to branch jira/LUCENE-9220
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/jira/LUCENE-9220 by this push:
     new 76ed7d2  LUCENE-9220: simplify regeneration logic, handle if language gets deleted, etc
76ed7d2 is described below

commit 76ed7d2cc3a4ed0681b262385d59127b2377a75f
Author: Robert Muir <rm...@apache.org>
AuthorDate: Mon Feb 17 11:13:44 2020 -0500

    LUCENE-9220: simplify regeneration logic, handle if language gets deleted, etc
---
 gradle/generation/snowball.sh | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/gradle/generation/snowball.sh b/gradle/generation/snowball.sh
index 6b267fe..27aaf36 100755
--- a/gradle/generation/snowball.sh
+++ b/gradle/generation/snowball.sh
@@ -72,29 +72,27 @@ done
 
 # regenerate test data
 rm -f ${TESTDSTDIR}/test_languages.txt
+rm -f ${TESTDSTDIR}/*.zip
 for file in ${TESTSRCDIR}/*; do
-  if [ -f "${file}/voc.txt" ] && [ -f "${file}/output.txt" ]; then
+  # look for input (voc.txt) and expected output (output.txt) without any special licenses (COPYING)
+  if [ -f "${file}/voc.txt" ] && [ -f "${file}/output.txt" ] && [ ! -f "${file}/COPYING" ]; then
     language=$(basename ${file})
     if [ "${language}" == "kraaij_pohlmann" ]; then
       language="kp"
     fi
-    rm -f ${TESTDSTDIR}/${language}.zip
     # make the .zip reproducible if data hasn't changed.
     arbitrary_timestamp="200001010000"
     # some test files are yuge, randomly sample up to this amount
     row_limit="2000"
-    # TODO: for now don't deal with any special licenses
-    if [ ! -f "${file}/COPYING" ]; then
-      tmpdir=$(mktemp -d)
-      myrandom="openssl enc -aes-256-ctr -k ${arbitrary_timestamp} -nosalt -iv 0 -md md5"
-      for data in "voc.txt" "output.txt"; do
-        shuf -n ${row_limit} --random-source=<(${myrandom} < /dev/zero 2>/dev/null) ${file}/${data} > ${tmpdir}/${data} \
-          && touch -t ${arbitrary_timestamp} ${tmpdir}/${data}
-      done
-      zip --quiet --junk-paths -X -9 ${TESTDSTDIR}/${language}.zip ${tmpdir}/voc.txt ${tmpdir}/output.txt
-      echo "${language}" >> ${TESTDSTDIR}/test_languages.txt
-      rm -r ${tmpdir}
-    fi
+    tmpdir=$(mktemp -d)
+    myrandom="openssl enc -aes-256-ctr -k ${arbitrary_timestamp} -nosalt -iv 0 -md md5"
+    for data in "voc.txt" "output.txt"; do
+      shuf -n ${row_limit} --random-source=<(${myrandom} < /dev/zero 2>/dev/null) ${file}/${data} > ${tmpdir}/${data} \
+        && touch -t ${arbitrary_timestamp} ${tmpdir}/${data}
+    done
+    zip --quiet --junk-paths -X -9 ${TESTDSTDIR}/${language}.zip ${tmpdir}/voc.txt ${tmpdir}/output.txt
+    echo "${language}" >> ${TESTDSTDIR}/test_languages.txt
+    rm -r ${tmpdir}
   fi
 done