You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/05/12 19:41:46 UTC
[tika] branch main updated: TIKA-3756
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new adf8dacf7 TIKA-3756
adf8dacf7 is described below
commit adf8dacf7e9a80eb2b713791fa9a50bb870b321d
Author: tallison <ta...@apache.org>
AuthorDate: Thu May 12 15:41:30 2022 -0400
TIKA-3756
---
tika-langdetect/pom.xml | 12 ++++++---
.../lingo24/Lingo24LangDetectorTest.java | 10 +++----
.../langdetect/mitll/TextLangDetectorTest.java | 8 +++---
.../langdetect/opennlp/OpenNLPDetectorTest.java | 8 +++---
.../optimaize/OptimaizeLangDetectorTest.java | 31 ++++++++++++----------
.../langdetect/tika/LanguageIdentifierTest.java | 18 ++++++-------
.../tika/langdetect/tika/LanguageProfileTest.java | 6 ++---
.../tika/LanguageProfilerBuilderTest.java | 16 +++++------
.../tika/langdetect/tika/ProfilingWriterTest.java | 4 +--
9 files changed, 61 insertions(+), 52 deletions(-)
diff --git a/tika-langdetect/pom.xml b/tika-langdetect/pom.xml
index 843a84d33..bb546200d 100644
--- a/tika-langdetect/pom.xml
+++ b/tika-langdetect/pom.xml
@@ -47,10 +47,16 @@
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
- <!-- after we migrate everything to junit5, we can get rid of this -->
+ <!-- test dependencies -->
<dependency>
- <groupId>org.junit.vintage</groupId>
- <artifactId>junit-vintage-engine</artifactId>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-api</artifactId>
+ <version>${junit5.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.junit.jupiter</groupId>
+ <artifactId>junit-jupiter-engine</artifactId>
<version>${junit5.version}</version>
<scope>test</scope>
</dependency>
diff --git a/tika-langdetect/tika-langdetect-lingo24/src/test/java/org/apache/tika/langdetect/lingo24/Lingo24LangDetectorTest.java b/tika-langdetect/tika-langdetect-lingo24/src/test/java/org/apache/tika/langdetect/lingo24/Lingo24LangDetectorTest.java
index 94cc2f1e0..ec5501bc6 100644
--- a/tika-langdetect/tika-langdetect-lingo24/src/test/java/org/apache/tika/langdetect/lingo24/Lingo24LangDetectorTest.java
+++ b/tika-langdetect/tika-langdetect-lingo24/src/test/java/org/apache/tika/langdetect/lingo24/Lingo24LangDetectorTest.java
@@ -16,17 +16,17 @@
*/
package org.apache.tika.langdetect.lingo24;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assume.assumeTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assumptions.assumeTrue;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.List;
import org.apache.commons.io.IOUtils;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
import org.apache.tika.langdetect.LanguageDetectorTest;
import org.apache.tika.language.detect.LanguageDetector;
diff --git a/tika-langdetect/tika-langdetect-mitll-text/src/test/java/org/apache/tika/langdetect/mitll/TextLangDetectorTest.java b/tika-langdetect/tika-langdetect-mitll-text/src/test/java/org/apache/tika/langdetect/mitll/TextLangDetectorTest.java
index 49d995f94..09b4eb07d 100644
--- a/tika-langdetect/tika-langdetect-mitll-text/src/test/java/org/apache/tika/langdetect/mitll/TextLangDetectorTest.java
+++ b/tika-langdetect/tika-langdetect-mitll-text/src/test/java/org/apache/tika/langdetect/mitll/TextLangDetectorTest.java
@@ -16,15 +16,15 @@
*/
package org.apache.tika.langdetect.mitll;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assume.assumeTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assumptions.assumeTrue;
import java.nio.charset.StandardCharsets;
import java.util.List;
import org.apache.commons.io.IOUtils;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
import org.apache.tika.langdetect.LanguageDetectorTest;
import org.apache.tika.language.detect.LanguageDetector;
diff --git a/tika-langdetect/tika-langdetect-opennlp/src/test/java/org/apache/tika/langdetect/opennlp/OpenNLPDetectorTest.java b/tika-langdetect/tika-langdetect-opennlp/src/test/java/org/apache/tika/langdetect/opennlp/OpenNLPDetectorTest.java
index 2ba40c30f..b84fba9ef 100644
--- a/tika-langdetect/tika-langdetect-opennlp/src/test/java/org/apache/tika/langdetect/opennlp/OpenNLPDetectorTest.java
+++ b/tika-langdetect/tika-langdetect-opennlp/src/test/java/org/apache/tika/langdetect/opennlp/OpenNLPDetectorTest.java
@@ -16,7 +16,7 @@
*/
package org.apache.tika.langdetect.opennlp;
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
import java.io.InputStreamReader;
@@ -27,8 +27,8 @@ import java.util.List;
import java.util.Map;
import org.apache.commons.io.IOUtils;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
import org.apache.tika.langdetect.LanguageDetectorTest;
import org.apache.tika.language.detect.LanguageResult;
@@ -37,7 +37,7 @@ public class OpenNLPDetectorTest {
static Map<String, String> OPTIMAIZE_TO_OPENNLP = new HashMap<>();
- @BeforeClass
+ @BeforeAll
public static void setUp() {
OPTIMAIZE_TO_OPENNLP.put("da", "dan");
OPTIMAIZE_TO_OPENNLP.put("de", "deu");
diff --git a/tika-langdetect/tika-langdetect-optimaize/src/test/java/org/apache/tika/langdetect/optimaize/OptimaizeLangDetectorTest.java b/tika-langdetect/tika-langdetect-optimaize/src/test/java/org/apache/tika/langdetect/optimaize/OptimaizeLangDetectorTest.java
index 6a7316256..903bdef6a 100644
--- a/tika-langdetect/tika-langdetect-optimaize/src/test/java/org/apache/tika/langdetect/optimaize/OptimaizeLangDetectorTest.java
+++ b/tika-langdetect/tika-langdetect-optimaize/src/test/java/org/apache/tika/langdetect/optimaize/OptimaizeLangDetectorTest.java
@@ -16,10 +16,10 @@
*/
package org.apache.tika.langdetect.optimaize;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
@@ -29,7 +29,8 @@ import java.util.Locale;
import java.util.Map;
import org.apache.commons.io.IOUtils;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
import org.apache.tika.langdetect.LanguageDetectorTest;
import org.apache.tika.language.detect.LanguageConfidence;
@@ -201,9 +202,9 @@ public class OptimaizeLangDetectorTest extends LanguageDetectorTest {
if (results.size() > 0) {
LanguageResult result = results.get(0);
- assertFalse(
+ assertFalse(result.isReasonablyCertain(),
"mix of " + language + " and " + other + " incorrectly detected as " +
- result, result.isReasonablyCertain());
+ result);
}
}
}
@@ -235,14 +236,15 @@ public class OptimaizeLangDetectorTest extends LanguageDetectorTest {
writeTo(language, writer, 300);
LanguageResult result = detector.detect();
- assertNotNull(String.format(Locale.US, "Language '%s' wasn't detected", language),
- result);
+ assertNotNull(result, String.format(Locale.US, "Language '%s' wasn't detected",
+ language));
- assertTrue(String.format(Locale.US, "Language '%s' was detected as '%s'", language,
- result.getLanguage()), result.isLanguage(language));
- assertTrue(
+ assertTrue(result.isLanguage(language), String.format(Locale.US, "Language '%s' was " +
+ "detected as '%s'", language,
+ result.getLanguage()));
+ assertTrue(result.isReasonablyCertain(),
String.format(Locale.US, "Language '%s' isn't reasonably certain: %s", language,
- result.getConfidence()), result.isReasonablyCertain());
+ result.getConfidence()));
}
writer.close();
@@ -270,7 +272,8 @@ public class OptimaizeLangDetectorTest extends LanguageDetectorTest {
return result;
}
- @Test(timeout = 5000)
+ @Test
+ @Timeout(5000)
public void testOptimaizeRegexBug() throws Exception {
//confirm TIKA-2777 doesn't affect langdetect's Optimaize
LanguageDetector detector = new OptimaizeLangDetector().setShortText(false).loadModels();
diff --git a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageIdentifierTest.java b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageIdentifierTest.java
index 0e4729356..15c190cc0 100644
--- a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageIdentifierTest.java
+++ b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageIdentifierTest.java
@@ -17,9 +17,9 @@
package org.apache.tika.langdetect.tika;
import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
import java.io.InputStream;
@@ -29,8 +29,8 @@ import java.util.HashMap;
import java.util.Locale;
import org.apache.commons.io.IOUtils;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
/**
* JUnit based test of class {@link LanguageIdentifier}.
@@ -45,7 +45,7 @@ public class LanguageIdentifierTest {
// Enable when language detection works better.
"da", "de", /* "et", "el", */ "en", "es", "fi", "fr", "it", "lt", "nl", "pt", "sv"};
- @Before
+ @BeforeEach
public void setUp() {
LanguageIdentifier.initProfiles();
}
@@ -60,7 +60,7 @@ public class LanguageIdentifierTest {
assertEquals(language, identifier.getLanguage());
// Lithuanian is detected but isn't reasonably certain:
if (!language.equals("lt")) {
- assertTrue(identifier.toString(), identifier.isReasonablyCertain());
+ assertTrue(identifier.isReasonablyCertain(), identifier.toString());
}
}
}
@@ -157,9 +157,9 @@ public class LanguageIdentifierTest {
writeTo(other, writer);
LanguageIdentifier identifier = null;
identifier = new LanguageIdentifier(writer.getProfile());
- assertFalse(
+ assertFalse(identifier.isReasonablyCertain(),
"mix of " + language + " and " + other + " incorrectly detected as " +
- identifier, identifier.isReasonablyCertain());
+ identifier);
}
}
}
diff --git a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfileTest.java b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfileTest.java
index a2820e0f7..612df7c52 100644
--- a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfileTest.java
+++ b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfileTest.java
@@ -16,12 +16,12 @@
*/
package org.apache.tika.langdetect.tika;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.IOException;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
public class LanguageProfileTest {
diff --git a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfilerBuilderTest.java b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfilerBuilderTest.java
index 371b2c328..9f3e907dc 100644
--- a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfilerBuilderTest.java
+++ b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/LanguageProfilerBuilderTest.java
@@ -18,8 +18,8 @@
package org.apache.tika.langdetect.tika;
import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.BufferedReader;
import java.io.IOException;
@@ -30,9 +30,9 @@ import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
import org.apache.tika.exception.TikaException;
@@ -44,20 +44,20 @@ public class LanguageProfilerBuilderTest {
String profileName = "test-profile";
private Path tmpProfileModel;
- @Before
+ @BeforeEach
public void setUp() throws Exception {
tmpProfileModel = Files.createTempFile("tika-lang", ".ngp");
try (InputStream is = LanguageProfilerBuilderTest.class.getResourceAsStream(corpusName)) {
LanguageProfilerBuilder ngramProfileBuilder =
LanguageProfilerBuilder.create(profileName, is, UTF_8.name());
try (OutputStream os = Files.newOutputStream(tmpProfileModel)) {
- ngramProfileBuilder.save(os);
+ ngramProfileBuilder.save(os);;
assertEquals(maxlen, ngramProfileBuilder.getSorted().size());
}
}
}
- @After
+ @AfterEach
public void tearDown() throws Exception {
if (Files.isRegularFile(tmpProfileModel)) {
Files.delete(tmpProfileModel);
diff --git a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/ProfilingWriterTest.java b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/ProfilingWriterTest.java
index a9c2dc567..beb7c2c66 100644
--- a/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/ProfilingWriterTest.java
+++ b/tika-langdetect/tika-langdetect-tika/src/test/java/org/apache/tika/langdetect/tika/ProfilingWriterTest.java
@@ -16,11 +16,11 @@
*/
package org.apache.tika.langdetect.tika;
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
@Deprecated
public class ProfilingWriterTest {