You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/05/12 19:41:46 UTC

[tika] branch main updated: TIKA-3756

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new adf8dacf7 TIKA-3756
adf8dacf7 is described below

commit adf8dacf7e9a80eb2b713791fa9a50bb870b321d
Author: tallison <ta...@apache.org>
AuthorDate: Thu May 12 15:41:30 2022 -0400

    TIKA-3756
---
 tika-langdetect/pom.xml                            | 12 ++++++---
 .../lingo24/Lingo24LangDetectorTest.java           | 10 +++----
 .../langdetect/mitll/TextLangDetectorTest.java     |  8 +++---
 .../langdetect/opennlp/OpenNLPDetectorTest.java    |  8 +++---
 .../optimaize/OptimaizeLangDetectorTest.java       | 31 ++++++++++++----------
 .../langdetect/tika/LanguageIdentifierTest.java    | 18 ++++++-------
 .../tika/langdetect/tika/LanguageProfileTest.java  |  6 ++---
 .../tika/LanguageProfilerBuilderTest.java          | 16 +++++------
 .../tika/langdetect/tika/ProfilingWriterTest.java  |  4 +--
 9 files changed, 61 insertions(+), 52 deletions(-)

diff --git a/tika-langdetect/pom.xml b/tika-langdetect/pom.xml
index 843a84d33..bb546200d 100644
--- a/tika-langdetect/pom.xml
+++ b/tika-langdetect/pom.xml
@@ -47,10 +47,16 @@
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
-    <!-- after we migrate everything to junit5, we can get rid of this -->
+    <!-- test dependencies -->
     <dependency>
-      <groupId>org.junit.vintage</groupId>
-      <artifactId>junit-vintage-engine</artifactId>
+      <groupId>org.junit.jupiter</groupId>
+      <artifactId>junit-jupiter-api</artifactId>
+      <version>${junit5.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.junit.jupiter</groupId>
+      <artifactId>junit-jupiter-engine</artifactId>
       <version>${junit5.version}</version>
       <scope>test</scope>
     </dependency>
diff --git a/tika-langdetect/tika-langdetect-lingo24/src/test/java/org/apache/tika/langdetect/lingo24/Lingo24LangDetectorTest.java b/tika-langdetect/tika-langdetect-lingo24/src/test/java/org/apache/tika/langdetect/lingo24/Lingo24LangDetectorTest.java
index 94cc2f1e0..ec5501bc6 100644
--- a/tika-langdetect/tika-langdetect-lingo24/src/test/java/org/apache/tika/langdetect/lingo24/Lingo24LangDetectorTest.java
+++ b/tika-langdetect/tika-langdetect-lingo24/src/test/java/org/apache/tika/langdetect/lingo24/Lingo24LangDetectorTest.java
@@ -16,17 +16,17 @@
  */
 package org.apache.tika.langdetect.lingo24;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assume.assumeTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assumptions.assumeTrue;
 
 import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
 
 import org.apache.commons.io.IOUtils;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import org.apache.tika.langdetect.LanguageDetectorTest;
 import org.apache.tika.language.detect.LanguageDetector;
diff --git a/tika-langdetect/tika-langdetect-mitll-text/src/test/java/org/apache/tika/langdetect/mitll/TextLangDetectorTest.java b/tika-langdetect/tika-langdetect-mitll-text/src/test/java/org/apache/tika/langdetect/mitll/TextLangDetectorTest.java
index 49d995f94..09b4eb07d 100644
--- a/tika-langdetect/tika-langdetect-mitll-text/src/test/java/org/apache/tika/langdetect/mitll/TextLangDetectorTest.java
+++ b/tika-langdetect/tika-langdetect-mitll-text/src/test/java/org/apache/tika/langdetect/mitll/TextLangDetectorTest.java
@@ -16,15 +16,15 @@
  */
 package org.apache.tika.langdetect.mitll;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assume.assumeTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assumptions.assumeTrue;
 
 import java.nio.charset.StandardCharsets;
 import java.util.List;
 
 import org.apache.commons.io.IOUtils;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import org.apache.tika.langdetect.LanguageDetectorTest;
 import org.apache.tika.language.detect.LanguageDetector;
diff --git a/tika-langdetect/tika-langdetect-opennlp/src/test/java/org/apache/tika/langdetect/opennlp/OpenNLPDetectorTest.java b/tika-langdetect/tika-langdetect-opennlp/src/test/java/org/apache/tika/langdetect/opennlp/OpenNLPDetectorTest.java
index 2ba40c30f..b84fba9ef 100644
--- a/tika-langdetect/tika-langdetect-opennlp/src/test/java/org/apache/tika/langdetect/opennlp/OpenNLPDetectorTest.java
+++ b/tika-langdetect/tika-langdetect-opennlp/src/test/java/org/apache/tika/langdetect/opennlp/OpenNLPDetectorTest.java
@@ -16,7 +16,7 @@
  */
 package org.apache.tika.langdetect.opennlp;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 import java.io.IOException;
 import java.io.InputStreamReader;
@@ -27,8 +27,8 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.io.IOUtils;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import org.apache.tika.langdetect.LanguageDetectorTest;
 import org.apache.tika.language.detect.LanguageResult;
@@ -37,7 +37,7 @@ public class OpenNLPDetectorTest {
 
     static Map<String, String> OPTIMAIZE_TO_OPENNLP = new HashMap<>();
 
-    @BeforeClass
+    @BeforeAll
     public static void setUp() {
         OPTIMAIZE_TO_OPENNLP.put("da", "dan");
         OPTIMAIZE_TO_OPENNLP.put("de", "deu");
diff --git a/tika-langdetect/tika-langdetect-optimaize/src/test/java/org/apache/tika/langdetect/optimaize/OptimaizeLangDetectorTest.java b/tika-langdetect/tika-langdetect-optimaize/src/test/java/org/apache/tika/langdetect/optimaize/OptimaizeLangDetectorTest.java
index 6a7316256..903bdef6a 100644
--- a/tika-langdetect/tika-langdetect-optimaize/src/test/java/org/apache/tika/langdetect/optimaize/OptimaizeLangDetectorTest.java
+++ b/tika-langdetect/tika-langdetect-optimaize/src/test/java/org/apache/tika/langdetect/optimaize/OptimaizeLangDetectorTest.java
@@ -16,10 +16,10 @@
  */
 package org.apache.tika.langdetect.optimaize;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
@@ -29,7 +29,8 @@ import java.util.Locale;
 import java.util.Map;
 
 import org.apache.commons.io.IOUtils;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
 
 import org.apache.tika.langdetect.LanguageDetectorTest;
 import org.apache.tika.language.detect.LanguageConfidence;
@@ -201,9 +202,9 @@ public class OptimaizeLangDetectorTest extends LanguageDetectorTest {
                 if (results.size() > 0) {
                     LanguageResult result = results.get(0);
 
-                    assertFalse(
+                    assertFalse(result.isReasonablyCertain(),
                             "mix of " + language + " and " + other + " incorrectly detected as " +
-                                    result, result.isReasonablyCertain());
+                                    result);
                 }
             }
         }
@@ -235,14 +236,15 @@ public class OptimaizeLangDetectorTest extends LanguageDetectorTest {
             writeTo(language, writer, 300);
 
             LanguageResult result = detector.detect();
-            assertNotNull(String.format(Locale.US, "Language '%s' wasn't detected", language),
-                    result);
+            assertNotNull(result, String.format(Locale.US, "Language '%s' wasn't detected",
+                            language));
 
-            assertTrue(String.format(Locale.US, "Language '%s' was detected as '%s'", language,
-                    result.getLanguage()), result.isLanguage(language));
-            assertTrue(
+            assertTrue(result.isLanguage(language), String.format(Locale.US, "Language '%s' was " +
+                            "detected as '%s'", language,
+                    result.getLanguage()));
+            assertTrue(result.isReasonablyCertain(),
                     String.format(Locale.US, "Language '%s' isn't reasonably certain: %s", language,
-                            result.getConfidence()), result.isReasonablyCertain());
+                            result.getConfidence()));
         }
 
         writer.close();
@@ -270,7 +272,8 @@ public class OptimaizeLangDetectorTest extends LanguageDetectorTest {
         return result;
     }
 
-    @Test(timeout = 5000)
+    @Test
+    @Timeout(5000)
     public void testOptimaizeRegexBug() throws Exception {
         //confirm TIKA-2777 doesn't affect langdetect's Optimaize
         LanguageDetector detector = new OptimaizeLangDetector().setShortText(false).loadModels();
diff --git a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageIdentifierTest.java b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageIdentifierTest.java
index 0e4729356..15c190cc0 100644
--- a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageIdentifierTest.java
+++ b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageIdentifierTest.java
@@ -17,9 +17,9 @@
 package org.apache.tika.langdetect.tika;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -29,8 +29,8 @@ import java.util.HashMap;
 import java.util.Locale;
 
 import org.apache.commons.io.IOUtils;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 /**
  * JUnit based test of class {@link LanguageIdentifier}.
@@ -45,7 +45,7 @@ public class LanguageIdentifierTest {
             // Enable when language detection works better.
             "da", "de", /* "et", "el", */ "en", "es", "fi", "fr", "it", "lt", "nl", "pt", "sv"};
 
-    @Before
+    @BeforeEach
     public void setUp() {
         LanguageIdentifier.initProfiles();
     }
@@ -60,7 +60,7 @@ public class LanguageIdentifierTest {
             assertEquals(language, identifier.getLanguage());
             // Lithuanian is detected but isn't reasonably certain:
             if (!language.equals("lt")) {
-                assertTrue(identifier.toString(), identifier.isReasonablyCertain());
+                assertTrue(identifier.isReasonablyCertain(), identifier.toString());
             }
         }
     }
@@ -157,9 +157,9 @@ public class LanguageIdentifierTest {
                     writeTo(other, writer);
                     LanguageIdentifier identifier = null;
                     identifier = new LanguageIdentifier(writer.getProfile());
-                    assertFalse(
+                    assertFalse(identifier.isReasonablyCertain(),
                             "mix of " + language + " and " + other + " incorrectly detected as " +
-                                    identifier, identifier.isReasonablyCertain());
+                                    identifier);
                 }
             }
         }
diff --git a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfileTest.java b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfileTest.java
index a2820e0f7..612df7c52 100644
--- a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfileTest.java
+++ b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfileTest.java
@@ -16,12 +16,12 @@
  */
 package org.apache.tika.langdetect.tika;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.IOException;
 
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 public class LanguageProfileTest {
 
diff --git a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfilerBuilderTest.java b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfilerBuilderTest.java
index 371b2c328..9f3e907dc 100644
--- a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfilerBuilderTest.java
+++ b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfilerBuilderTest.java
@@ -18,8 +18,8 @@
 package org.apache.tika.langdetect.tika;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.BufferedReader;
 import java.io.IOException;
@@ -30,9 +30,9 @@ import java.net.URISyntaxException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import org.apache.tika.exception.TikaException;
 
@@ -44,20 +44,20 @@ public class LanguageProfilerBuilderTest {
     String profileName = "test-profile";
     private Path tmpProfileModel;
 
-    @Before
+    @BeforeEach
     public void setUp() throws Exception {
         tmpProfileModel = Files.createTempFile("tika-lang", ".ngp");
         try (InputStream is = LanguageProfilerBuilderTest.class.getResourceAsStream(corpusName)) {
             LanguageProfilerBuilder ngramProfileBuilder =
                     LanguageProfilerBuilder.create(profileName, is, UTF_8.name());
             try (OutputStream os = Files.newOutputStream(tmpProfileModel)) {
-                ngramProfileBuilder.save(os);
+                ngramProfileBuilder.save(os);;
                 assertEquals(maxlen, ngramProfileBuilder.getSorted().size());
             }
         }
     }
 
-    @After
+    @AfterEach
     public void tearDown() throws Exception {
         if (Files.isRegularFile(tmpProfileModel)) {
             Files.delete(tmpProfileModel);
diff --git a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/ProfilingWriterTest.java b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/ProfilingWriterTest.java
index a9c2dc567..beb7c2c66 100644
--- a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/ProfilingWriterTest.java
+++ b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/ProfilingWriterTest.java
@@ -16,11 +16,11 @@
  */
 package org.apache.tika.langdetect.tika;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 import java.io.IOException;
 
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 @Deprecated
 public class ProfilingWriterTest {