You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2024/01/24 15:57:03 UTC
(tika) branch main updated: [TIKA-4119]: Return media type "text/javascript" instead of "application/javascript" to follow RFC-9239 (#1556)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new ae737cd26 [TIKA-4119]: Return media type "text/javascript" instead of "application/javascript" to follow RFC-9239 (#1556)
ae737cd26 is described below
commit ae737cd2625b5e2659c20c27713785df8bfc1957
Author: Marcos Pereira <ma...@gmail.com>
AuthorDate: Wed Jan 24 10:56:58 2024 -0500
[TIKA-4119]: Return media type "text/javascript" instead of "application/javascript" to follow RFC-9239 (#1556)
* [TIKA-4119]: Return media type "text/javascript" instead of "application/javascript"
Following RFC 9239. This also adds support for ".msj" ( as documented in the RFC).
---
CHANGES.txt | 3 ++
README.md | 6 +--
.../org/apache/tika/mime/tika-mimetypes.xml | 9 +++--
.../java/org/apache/tika/TikaDetectionTest.java | 3 +-
.../org/apache/tika/mime/MimeTypesReaderTest.java | 43 ++++++++++++++--------
.../tika/mime/ProbabilisticMimeDetectionTest.java | 2 +-
.../java/org/apache/tika/mime/TestMimeTypes.java | 12 +++---
7 files changed, 48 insertions(+), 30 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 9953e46d3..f9ac540e6 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -16,6 +16,9 @@ Release 3.0.0-BETA - 12/01/2023
* Tika will look for "custom-mimetypes.xml" directly on the classpath, NOT
under "/org/apache/tika/mime/". (TIKA-4147).
+ * Return media type "text/javascript" instead of "application/javascript"
+ to follow RFC-9239. (TIKA-4119).
+
Other Changes/Updates
* Upgrade PDFBox to 3.0.1 (TIKA-3347)
diff --git a/README.md b/README.md
index 97d709656..88fe59cb8 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ Tika jars can be fetched from Maven Central or your favourite Maven mirror.
**Tika 1.X reached End of Life (EOL) on September 30, 2022.**
-Tika is based on **Java 8** and uses the [Maven 3](https://maven.apache.org) build system.
+Tika is based on **Java 11** and uses the [Maven 3](https://maven.apache.org) build system.
**N.B.** [Docker](https://www.docker.com/products/personal) is used for tests in tika-integration-tests.
As of Tika 2.5.1, if Docker is not installed, those tests are skipped. Docker is required for a successful
build on earlier 2.x versions.
@@ -50,7 +50,7 @@ Maven Dependencies
Apache Tika provides *Bill of Material* (BOM) artifact to align Tika module versions and simplify version management.
To avoid convergence errors in your own project, import this
-bom or Tika's parent pom.xml in your dependencey management section.
+bom or Tika's parent pom.xml in your dependency management section.
If you use Apache Maven:
@@ -170,7 +170,7 @@ Notification on all code changes are sent to the following mailing list:
The mailing lists are open to anyone and publicly archived.
You can subscribe the mailing lists by sending a message to
-[LIST]-subscribe@tika.apache.org (for example user-subscribe@...).
+[LIST]-subscribe@tika.apache.org (for example, user-subscribe@...).
To unsubscribe, send a message to [LIST]-unsubscribe@tika.apache.org.
For more instructions, send a message to [LIST]-help@tika.apache.org.
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index b76adebd1..54f7cc6f6 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -366,15 +366,18 @@
<glob pattern="*.ser"/>
</mime-type>
- <mime-type type="application/javascript">
+ <mime-type type="text/javascript">
+ <alias type="application/javascript"/>
<alias type="application/x-javascript"/>
- <alias type="text/javascript"/>
<sub-class-of type="text/plain"/>
<_comment>JavaScript Source Code</_comment>
+ <!-- From RFC 9239: https://www.rfc-editor.org/rfc/rfc9239.html#name-text-javascript -->
+ <!-- File extension(s): .js, .mjs -->
<glob pattern="*.js"/>
+ <glob pattern="*.mjs"/>
<!-- Note - there is no Unique Magic for JavaScript files! -->
- <!-- Generally you can only detect JS with the filename -->
+ <!-- Generally, you can only detect JS with the filename -->
<!-- However... A few common JS libraries accidentally trigger -->
<!-- the HTML priority=20 magic incorrectly. So, for those only, -->
<!-- we list "magic" for those specific files -->
diff --git a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
index 1cd0f40a2..215865886 100644
--- a/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/TikaDetectionTest.java
@@ -56,7 +56,8 @@ public class TikaDetectionTest {
assertEquals("application/java-archive", tika.detect("x.jar"));
assertEquals("application/java-serialized-object", tika.detect("x.ser"));
assertEquals("application/java-vm", tika.detect("x.class"));
- assertEquals("application/javascript", tika.detect("x.js"));
+ assertEquals("text/javascript", tika.detect("x.js"));
+ assertEquals("text/javascript", tika.detect("x.mjs"));
assertEquals("application/json", tika.detect("x.json"));
assertEquals("application/lost+xml", tika.detect("x.lostxml"));
assertEquals("application/mac-binhex40", tika.detect("x.hqx"));
diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
index 7d576d73a..0d904f6df 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeTypesReaderTest.java
@@ -31,6 +31,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Executors;
+import java.util.stream.Collectors;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
@@ -279,6 +280,24 @@ public class MimeTypesReaderTest {
assertEquals(".ppt", mt.getExtensions().get(0));
}
+ @Test
+ public void testGetExtensionForJavaScript() throws Exception {
+ MimeType mt = this.mimeTypes.forName("text/javascript");
+ assertEquals(".js", mt.getExtension());
+ assertEquals(List.of(".js", ".mjs"), mt.getExtensions());
+ }
+
+ @Test
+ public void testGetAliasForJavaScript() throws Exception {
+ MimeType mt = this.mimeTypes.forName("text/javascript");
+ Set<String> aliases = mimeTypes.getMediaTypeRegistry()
+ .getAliases(mt.getType())
+ .stream()
+ .map(MediaType::toString)
+ .collect(Collectors.toSet());
+ assertEquals(Set.of("application/javascript", "application/x-javascript"), aliases);
+ }
+
@Test
public void testGetRegisteredMimesWithParameters() throws Exception {
//TIKA-1692
@@ -351,40 +370,32 @@ public class MimeTypesReaderTest {
}
@Test
- public void testBadMinShouldMatch1() throws Exception {
+ public void testBadMinShouldMatch1() {
System.setProperty(MimeTypesFactory.CUSTOM_MIMES_SYS_PROP,
"src/test/resources/org/apache/tika/mime/custom-mimetypes-badMinShouldMatch1.xml");
- assertThrows(IllegalArgumentException.class, () -> {
- MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(new CustomClassLoader());
- });
+ assertThrows(IllegalArgumentException.class, () -> MimeTypes.getDefaultMimeTypes(new CustomClassLoader()));
}
@Test
- public void testBadMinShouldMatch2() throws Exception {
+ public void testBadMinShouldMatch2() {
System.setProperty(MimeTypesFactory.CUSTOM_MIMES_SYS_PROP,
"src/test/resources/org/apache/tika/mime/custom-mimetypes-badMinShouldMatch2.xml");
- assertThrows(IllegalArgumentException.class, () -> {
- MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(new CustomClassLoader());
- });
+ assertThrows(IllegalArgumentException.class, () -> MimeTypes.getDefaultMimeTypes(new CustomClassLoader()));
}
@Test
- public void testBadMinShouldMatch3() throws Exception {
+ public void testBadMinShouldMatch3() {
System.setProperty(MimeTypesFactory.CUSTOM_MIMES_SYS_PROP,
"src/test/resources/org/apache/tika/mime/custom-mimetypes-badMinShouldMatch3.xml");
- assertThrows(IllegalArgumentException.class, () -> {
- MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(new CustomClassLoader());
- });
+ assertThrows(IllegalArgumentException.class, () -> MimeTypes.getDefaultMimeTypes(new CustomClassLoader()));
}
@Test
- public void testBadMinShouldMatch4() throws Exception {
+ public void testBadMinShouldMatch4() {
System.setProperty(MimeTypesFactory.CUSTOM_MIMES_SYS_PROP,
"src/test/resources/org/apache/tika/mime/custom-mimetypes-badMinShouldMatch4.xml");
- assertThrows(IllegalArgumentException.class, () -> {
- MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(new CustomClassLoader());
- });
+ assertThrows(IllegalArgumentException.class, () -> MimeTypes.getDefaultMimeTypes(new CustomClassLoader()));
}
private static class CustomClassLoader extends ClassLoader {
diff --git a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
index 437cefc6d..d9a65bf1b 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
@@ -246,6 +246,6 @@ public class ProbabilisticMimeDetectionTest {
"} catch (e) {\n" + " console.log(e);\n" + "}")
.getBytes(StandardCharsets.UTF_8));
MediaType detect = new ProbabilisticMimeDetectionSelector().detect(input, metadata);
- assertEquals(MediaType.application("javascript"), detect);
+ assertEquals(MediaType.text("javascript"), detect);
}
}
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
index 3dad7d6af..1b66a7efe 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/mime/TestMimeTypes.java
@@ -718,8 +718,8 @@ public class TestMimeTypes {
assertTypeByName("text/html", "testHTML.html");
assertType("text/html", "testHTML.html");
- assertTypeByName("application/javascript", "testJS.js");
- assertType("application/javascript", "testJS.js");
+ assertTypeByName("text/javascript", "testJS.js");
+ assertType("text/javascript", "testJS.js");
assertType("text/vnd.graphviz", "testGRAPHVIZd.dot");
assertType("text/vnd.graphviz", "testGRAPHVIZg.dot");
@@ -1148,10 +1148,10 @@ public class TestMimeTypes {
assertTypeByData("text/x-matlab", "testMATLAB_barcast.m");
// By name, or by name+data, gets it as JS
- assertTypeByName("application/javascript", "testJS.js");
- assertTypeByName("application/javascript", "testJS_HTML.js");
- assertType("application/javascript", "testJS.js");
- assertType("application/javascript", "testJS_HTML.js");
+ assertTypeByName("text/javascript", "testJS.js");
+ assertTypeByName("text/javascript", "testJS_HTML.js");
+ assertType("text/javascript", "testJS.js");
+ assertType("text/javascript", "testJS_HTML.js");
// With data only, because we have no JS file magic, can't be
// detected. One will come through as plain text, the other