You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2022/05/02 13:24:02 UTC
[lucene] branch branch_9x updated: LUCENE-10551: improve testing of LowercaseAsciiCompression (#858)
This is an automated email from the ASF dual-hosted git repository.
mikemccand pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 046991a4cf3 LUCENE-10551: improve testing of LowercaseAsciiCompression (#858)
046991a4cf3 is described below
commit 046991a4cf394e8dd4f263f64bb0a68eb8c7c130
Author: Michael McCandless <mi...@apache.org>
AuthorDate: Mon May 2 07:49:16 2022 -0500
LUCENE-10551: improve testing of LowercaseAsciiCompression (#858)
---
.../util/compress/LowercaseAsciiCompression.java | 11 +++---
.../compress/TestLowercaseAsciiCompression.java | 41 +++++++++++++++++-----
2 files changed, 38 insertions(+), 14 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java b/lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java
index a70a89efc08..1f287c3b4fd 100644
--- a/lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java
+++ b/lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java
@@ -111,14 +111,13 @@ public final class LowercaseAsciiCompression {
numExceptions2++;
}
}
+
+ // TODO: shouldn't this really be an assert instead? but then this real "if" triggered
+ // LUCENE-10551 so maybe it should remain a real "if":
+
if (numExceptions != numExceptions2) {
throw new IllegalStateException(
- ""
- + numExceptions
- + " <> "
- + numExceptions2
- + " "
- + new BytesRef(in, 0, len).utf8ToString());
+ "" + numExceptions + " <> " + numExceptions2 + " " + new BytesRef(in, 0, len));
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/util/compress/TestLowercaseAsciiCompression.java b/lucene/core/src/test/org/apache/lucene/util/compress/TestLowercaseAsciiCompression.java
index fd8374575e8..3c18d0334fb 100644
--- a/lucene/core/src/test/org/apache/lucene/util/compress/TestLowercaseAsciiCompression.java
+++ b/lucene/core/src/test/org/apache/lucene/util/compress/TestLowercaseAsciiCompression.java
@@ -17,6 +17,7 @@
package org.apache.lucene.util.compress;
import java.io.IOException;
+import java.nio.charset.StandardCharsets;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.lucene.store.ByteBuffersDataOutput;
@@ -47,20 +48,35 @@ public class TestLowercaseAsciiCompression extends LuceneTestCase {
}
public void testSimple() throws Exception {
- assertFalse(doTestCompress("".getBytes("UTF-8"))); // too short
- assertFalse(doTestCompress("ab1".getBytes("UTF-8"))); // too short
- assertFalse(doTestCompress("ab1cdef".getBytes("UTF-8"))); // too short
- assertTrue(doTestCompress("ab1cdefg".getBytes("UTF-8")));
- assertFalse(doTestCompress("ab1cdEfg".getBytes("UTF-8"))); // too many exceptions
- assertTrue(doTestCompress("ab1cdefg".getBytes("UTF-8")));
+ assertFalse(doTestCompress("".getBytes(StandardCharsets.UTF_8))); // too short
+ assertFalse(doTestCompress("ab1".getBytes(StandardCharsets.UTF_8))); // too short
+ assertFalse(doTestCompress("ab1cdef".getBytes(StandardCharsets.UTF_8))); // too short
+ assertTrue(doTestCompress("ab1cdefg".getBytes(StandardCharsets.UTF_8)));
+ assertFalse(doTestCompress("ab1cdEfg".getBytes(StandardCharsets.UTF_8))); // too many exceptions
+ assertTrue(doTestCompress("ab1cdefg".getBytes(StandardCharsets.UTF_8)));
// 1 exception, but enough chars to be worth encoding an exception
- assertTrue(doTestCompress("ab1.dEfg427hiogchio:'nwm un!94twxz".getBytes("UTF-8")));
+ assertTrue(
+ doTestCompress("ab1.dEfg427hiogchio:'nwm un!94twxz".getBytes(StandardCharsets.UTF_8)));
+ }
+
+ // LUCENE-10551
+ public void testNotReallySimple() throws Exception {
+ doTestCompress(
+ "cion1cion_desarrollociones_oraclecionesnaturacionesnatura2tedppsa-integrationdemotiontion cloud gen2tion instance - dev1tion instance - testtion-devbtion-instancetion-prdtion-promerication-qation064533tion535217tion697401tion761348tion892818tion_matrationcauto_simmonsintgic_testtioncloudprodictioncloudservicetiongateway10tioninstance-jtsundatamartprd??o"
+ .getBytes(StandardCharsets.UTF_8));
+ }
+
+ // LUCENE-10551
+ public void testNotReallySimple2() throws Exception {
+ doTestCompress(
+ "analytics-platform-test/koala/cluster-tool:1.0-20220310151438.492,mesh_istio_examples-bookinfo-details-v1:1.16.2mesh_istio_examples-bookinfo-reviews-v3:1.16.2oce-clamav:1.0.219oce-tesseract:1.0.7oce-traefik:2.5.1oci-opensearch:1.2.4.8.103oda-digital-assistant-control-plane-train-pool-workflow-v6:22.02.14oke-coresvcs-k8s-dns-dnsmasq-nanny-amd64@sha256:41aa9160ceeaf712369ddb660d02e5ec06d1679965e6930351967c8cf5ed62d4oke-coresvcs-k8s-dns-kube-dns-amd64@sha256:2cf34b04106974952996c6e [...]
+ .getBytes(StandardCharsets.UTF_8));
}
public void testFarAwayExceptions() throws Exception {
String s =
"01W" + IntStream.range(0, 300).mapToObj(i -> "a").collect(Collectors.joining()) + "W.";
- assertTrue(doTestCompress(s.getBytes("UTF-8")));
+ assertTrue(doTestCompress(s.getBytes(StandardCharsets.UTF_8)));
}
public void testRandomAscii() throws IOException {
@@ -118,4 +134,13 @@ public class TestLowercaseAsciiCompression extends LuceneTestCase {
doTestCompress(bytes, len);
}
}
+
+ public void testAsciiCompressionRandom2() throws IOException {
+ int iters = atLeast(1000);
+ for (int iter = 0; iter < iters; ++iter) {
+ doTestCompress(
+ TestUtil.randomSubString(random(), atLeast(400), random().nextBoolean())
+ .getBytes(StandardCharsets.UTF_8));
+ }
+ }
}