You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2022/05/02 13:24:02 UTC

[lucene] branch branch_9x updated: LUCENE-10551: improve testing of LowercaseAsciiCompression (#858)

This is an automated email from the ASF dual-hosted git repository.

mikemccand pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 046991a4cf3 LUCENE-10551: improve testing of LowercaseAsciiCompression (#858)
046991a4cf3 is described below

commit 046991a4cf394e8dd4f263f64bb0a68eb8c7c130
Author: Michael McCandless <mi...@apache.org>
AuthorDate: Mon May 2 07:49:16 2022 -0500

    LUCENE-10551: improve testing of LowercaseAsciiCompression (#858)
---
 .../util/compress/LowercaseAsciiCompression.java   | 11 +++---
 .../compress/TestLowercaseAsciiCompression.java    | 41 +++++++++++++++++-----
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java b/lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java
index a70a89efc08..1f287c3b4fd 100644
--- a/lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java
+++ b/lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java
@@ -111,14 +111,13 @@ public final class LowercaseAsciiCompression {
           numExceptions2++;
         }
       }
+
+      // TODO: shouldn't this really be an assert instead?  but then this real "if" triggered
+      // LUCENE-10551 so maybe it should remain a real "if":
+
       if (numExceptions != numExceptions2) {
         throw new IllegalStateException(
-            ""
-                + numExceptions
-                + " <> "
-                + numExceptions2
-                + " "
-                + new BytesRef(in, 0, len).utf8ToString());
+            "" + numExceptions + " <> " + numExceptions2 + " " + new BytesRef(in, 0, len));
       }
     }
 
diff --git a/lucene/core/src/test/org/apache/lucene/util/compress/TestLowercaseAsciiCompression.java b/lucene/core/src/test/org/apache/lucene/util/compress/TestLowercaseAsciiCompression.java
index fd8374575e8..3c18d0334fb 100644
--- a/lucene/core/src/test/org/apache/lucene/util/compress/TestLowercaseAsciiCompression.java
+++ b/lucene/core/src/test/org/apache/lucene/util/compress/TestLowercaseAsciiCompression.java
@@ -17,6 +17,7 @@
 package org.apache.lucene.util.compress;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import org.apache.lucene.store.ByteBuffersDataOutput;
@@ -47,20 +48,35 @@ public class TestLowercaseAsciiCompression extends LuceneTestCase {
   }
 
   public void testSimple() throws Exception {
-    assertFalse(doTestCompress("".getBytes("UTF-8"))); // too short
-    assertFalse(doTestCompress("ab1".getBytes("UTF-8"))); // too short
-    assertFalse(doTestCompress("ab1cdef".getBytes("UTF-8"))); // too short
-    assertTrue(doTestCompress("ab1cdefg".getBytes("UTF-8")));
-    assertFalse(doTestCompress("ab1cdEfg".getBytes("UTF-8"))); // too many exceptions
-    assertTrue(doTestCompress("ab1cdefg".getBytes("UTF-8")));
+    assertFalse(doTestCompress("".getBytes(StandardCharsets.UTF_8))); // too short
+    assertFalse(doTestCompress("ab1".getBytes(StandardCharsets.UTF_8))); // too short
+    assertFalse(doTestCompress("ab1cdef".getBytes(StandardCharsets.UTF_8))); // too short
+    assertTrue(doTestCompress("ab1cdefg".getBytes(StandardCharsets.UTF_8)));
+    assertFalse(doTestCompress("ab1cdEfg".getBytes(StandardCharsets.UTF_8))); // too many exceptions
+    assertTrue(doTestCompress("ab1cdefg".getBytes(StandardCharsets.UTF_8)));
     // 1 exception, but enough chars to be worth encoding an exception
-    assertTrue(doTestCompress("ab1.dEfg427hiogchio:'nwm un!94twxz".getBytes("UTF-8")));
+    assertTrue(
+        doTestCompress("ab1.dEfg427hiogchio:'nwm un!94twxz".getBytes(StandardCharsets.UTF_8)));
+  }
+
+  // LUCENE-10551
+  public void testNotReallySimple() throws Exception {
+    doTestCompress(
+        "cion1cion_desarrollociones_oraclecionesnaturacionesnatura2tedppsa-integrationdemotiontion cloud gen2tion instance - dev1tion instance - testtion-devbtion-instancetion-prdtion-promerication-qation064533tion535217tion697401tion761348tion892818tion_matrationcauto_simmonsintgic_testtioncloudprodictioncloudservicetiongateway10tioninstance-jtsundatamartprd??o"
+            .getBytes(StandardCharsets.UTF_8));
+  }
+
+  // LUCENE-10551
+  public void testNotReallySimple2() throws Exception {
+    doTestCompress(
+        "analytics-platform-test/koala/cluster-tool:1.0-20220310151438.492,mesh_istio_examples-bookinfo-details-v1:1.16.2mesh_istio_examples-bookinfo-reviews-v3:1.16.2oce-clamav:1.0.219oce-tesseract:1.0.7oce-traefik:2.5.1oci-opensearch:1.2.4.8.103oda-digital-assistant-control-plane-train-pool-workflow-v6:22.02.14oke-coresvcs-k8s-dns-dnsmasq-nanny-amd64@sha256:41aa9160ceeaf712369ddb660d02e5ec06d1679965e6930351967c8cf5ed62d4oke-coresvcs-k8s-dns-kube-dns-amd64@sha256:2cf34b04106974952996c6e [...]
+            .getBytes(StandardCharsets.UTF_8));
   }
 
   public void testFarAwayExceptions() throws Exception {
     String s =
         "01W" + IntStream.range(0, 300).mapToObj(i -> "a").collect(Collectors.joining()) + "W.";
-    assertTrue(doTestCompress(s.getBytes("UTF-8")));
+    assertTrue(doTestCompress(s.getBytes(StandardCharsets.UTF_8)));
   }
 
   public void testRandomAscii() throws IOException {
@@ -118,4 +134,13 @@ public class TestLowercaseAsciiCompression extends LuceneTestCase {
       doTestCompress(bytes, len);
     }
   }
+
+  public void testAsciiCompressionRandom2() throws IOException {
+    int iters = atLeast(1000);
+    for (int iter = 0; iter < iters; ++iter) {
+      doTestCompress(
+          TestUtil.randomSubString(random(), atLeast(400), random().nextBoolean())
+              .getBytes(StandardCharsets.UTF_8));
+    }
+  }
 }