You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by bt...@apache.org on 2023/11/27 12:23:19 UTC
(james-mime4j) 02/02: Content-Disposition filename Q and UTF-8 encoded.
This is an automated email from the ASF dual-hosted git repository.
btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-mime4j.git
commit d5ddd879a7fd4e1e1e7c9edb30850a43031fa7b3
Author: Shamil Vakhitov <sh...@bgerp.org>
AuthorDate: Thu Aug 19 13:25:26 2021 +0500
Content-Disposition filename Q and UTF-8 encoded.
---
.../apache/james/mime4j/stream/RawFieldParser.java | 46 ++++++++++++++++++++--
.../org/apache/james/mime4j/util/CharsetUtil.java | 17 ++++++++
.../field/LenientContentDispositionFieldTest.java | 34 +++++++++++++++-
3 files changed, 92 insertions(+), 5 deletions(-)
diff --git a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
index bb6448b6..e2c099dc 100644
--- a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
+++ b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
@@ -19,11 +19,15 @@
package org.apache.james.mime4j.stream;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import org.apache.james.mime4j.MimeException;
+import org.apache.james.mime4j.codec.DecodeMonitor;
+import org.apache.james.mime4j.codec.DecoderUtil;
+import org.apache.james.mime4j.util.ByteArrayBuffer;
import org.apache.james.mime4j.util.ByteSequence;
import org.apache.james.mime4j.util.CharsetUtil;
import org.apache.james.mime4j.util.ContentUtil;
@@ -191,6 +195,12 @@ public class RawFieldParser {
* is not delimited by any character.
*/
public String parseValue(final ByteSequence buf, final ParserCursor cursor, final BitSet delimiters) {
+ if (!CharsetUtil.isASCII(buf)) {
+ String value = parseUtf8Filename(buf);
+ if (value != null)
+ return value;
+ }
+
StringBuilder dst = new StringBuilder();
boolean whitespace = false;
while (!cursor.atEnd()) {
@@ -219,6 +229,25 @@ public class RawFieldParser {
return dst.toString();
}
+ /**
+ * Special case for parsing {@code filename} attribute in nonstandard encoding like:
+ * {@code Content-Disposition: attachment; filename="УПД ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ "СТАНЦИЯ ВИРТУАЛЬНАЯ" 01-05-21.pdf"}
+ *
+ * @param buf field raw.
+ * @return filename value or {@code null}.
+ */
+ private String parseUtf8Filename(ByteSequence buf) {
+ final String value = new String(buf.toByteArray(), StandardCharsets.UTF_8);
+
+ final String prefix = "filename=\"";
+ final int pos = value.indexOf(prefix);
+ if (pos > 0) {
+ return value.substring(pos + prefix.length(), value.length() - 1);
+ }
+
+ return null;
+ }
+
/**
* Skips semantically insignificant whitespace characters and moves the cursor to the closest
* non-whitespace character.
@@ -384,14 +413,17 @@ public class RawFieldParser {
}
pos++;
indexFrom++;
+
+ ByteArrayBuffer dstRaw = new ByteArrayBuffer(200);
+
boolean escaped = false;
for (int i = indexFrom; i < indexTo; i++, pos++) {
current = (char) (buf.byteAt(i) & 0xff);
if (escaped) {
if (current != '\"' && current != '\\') {
- dst.append('\\');
+ dstRaw.append('\\');
}
- dst.append(current);
+ dstRaw.append(current);
escaped = false;
} else {
if (current == '\"') {
@@ -401,10 +433,18 @@ public class RawFieldParser {
if (current == '\\') {
escaped = true;
} else if (current != '\r' && current != '\n') {
- dst.append(current);
+ dstRaw.append(current);
}
}
}
+
+ String decoded = ContentUtil.decode(dstRaw);
+ if (decoded.startsWith("=?")) {
+ decoded = DecoderUtil.decodeEncodedWords(decoded, DecodeMonitor.STRICT);
+ }
+
+ dst.append(decoded);
+
cursor.updatePos(pos);
}
diff --git a/core/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java b/core/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java
index 3504d29f..0a9c983c 100644
--- a/core/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java
+++ b/core/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java
@@ -56,6 +56,23 @@ public class CharsetUtil {
return (0xFF80 & ch) == 0;
}
+ /**
+ * Returns <code>true</code> if the specified byte array consists entirely of
+ * US ASCII characters.
+ *
+ * @param raw
+ * byte array to test.
+ * @return <code>true</code> if the specified string consists entirely of
+ * US ASCII characters, <code>false</code> otherwise.
+ */
+ public static boolean isASCII(ByteSequence raw) {
+ for (int i = 0; i < raw.length(); i++) {
+ if (!isASCII((char) (raw.byteAt(i) & 0xff)))
+ return false;
+ }
+ return true;
+ }
+
/**
* Returns <code>true</code> if the specified string consists entirely of
* US ASCII characters.
diff --git a/dom/src/test/java/org/apache/james/mime4j/field/LenientContentDispositionFieldTest.java b/dom/src/test/java/org/apache/james/mime4j/field/LenientContentDispositionFieldTest.java
index 1031da71..e3ca1c8c 100644
--- a/dom/src/test/java/org/apache/james/mime4j/field/LenientContentDispositionFieldTest.java
+++ b/dom/src/test/java/org/apache/james/mime4j/field/LenientContentDispositionFieldTest.java
@@ -19,17 +19,19 @@
package org.apache.james.mime4j.field;
+import java.nio.charset.StandardCharsets;
+import java.util.Date;
+
import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.dom.field.ContentDispositionField;
import org.apache.james.mime4j.stream.RawField;
import org.apache.james.mime4j.stream.RawFieldParser;
+import org.apache.james.mime4j.util.ByteArrayBuffer;
import org.apache.james.mime4j.util.ByteSequence;
import org.apache.james.mime4j.util.ContentUtil;
import org.junit.Assert;
import org.junit.Test;
-import java.util.Date;
-
public class LenientContentDispositionFieldTest {
static ContentDispositionField parse(final String s) throws MimeException {
@@ -38,6 +40,11 @@ public class LenientContentDispositionFieldTest {
return ContentDispositionFieldLenientImpl.PARSER.parse(rawField, null);
}
+ static ContentDispositionField parse(final byte[] raw) throws MimeException {
+ RawField rawField = RawFieldParser.DEFAULT.parseField(new ByteArrayBuffer(raw, true));
+ return ContentDispositionFieldLenientImpl.PARSER.parse(rawField, null);
+ }
+
@Test
public void testDispositionTypeWithSemiColonNoParams() throws Exception {
ContentDispositionField f = parse("Content-Disposition: inline;");
@@ -111,6 +118,29 @@ public class LenientContentDispositionFieldTest {
Assert.assertNull(f.getFilename());
}
+ @Test
+ public void testGetFilenameEncoded() throws Exception {
+ byte[] data = ("Content-Disposition: attachment;\n" +
+ " FileName=\"=?WINDOWS-1251?Q?3244659=5F=C0=EA=F2_=E7=E0_=C8=FE=EB=FC_?=\n" +
+ " =?WINDOWS-1251?Q?2020.pdf?=\"")
+ .getBytes(StandardCharsets.UTF_8);
+
+ ContentDispositionField f = parse(data);
+
+ Assert.assertEquals("WINDOWS-1251 Q encoded filename", "3244659_Акт за Июль 2020.pdf", f.getFilename());
+ }
+
+ @Test
+ public void testGetFilenameUtf8() throws Exception {
+ byte[] data =
+ "Content-Disposition: attachment; filename=\"УПД ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ \"СТАНЦИЯ ВИРТУАЛЬНАЯ\" 01-05-21.pdf\""
+ .getBytes(StandardCharsets.UTF_8);
+
+ ContentDispositionField f = parse(data);
+
+ Assert.assertEquals("UTF8 encoded filename", "УПД ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ \"СТАНЦИЯ ВИРТУАЛЬНАЯ\" 01-05-21.pdf", f.getFilename());
+ }
+
@Test
public void testGetCreationDate() throws Exception {
ContentDispositionField f = parse("Content-Disposition: inline; "
---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org