You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by bt...@apache.org on 2023/11/27 12:23:19 UTC

(james-mime4j) 02/02: Content-Disposition filename Q and UTF-8 encoded.

This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-mime4j.git

commit d5ddd879a7fd4e1e1e7c9edb30850a43031fa7b3
Author: Shamil Vakhitov <sh...@bgerp.org>
AuthorDate: Thu Aug 19 13:25:26 2021 +0500

    Content-Disposition filename Q and UTF-8 encoded.
---
 .../apache/james/mime4j/stream/RawFieldParser.java | 46 ++++++++++++++++++++--
 .../org/apache/james/mime4j/util/CharsetUtil.java  | 17 ++++++++
 .../field/LenientContentDispositionFieldTest.java  | 34 +++++++++++++++-
 3 files changed, 92 insertions(+), 5 deletions(-)

diff --git a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
index bb6448b6..e2c099dc 100644
--- a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
+++ b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
@@ -19,11 +19,15 @@
 
 package org.apache.james.mime4j.stream;
 
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.BitSet;
 import java.util.List;
 
 import org.apache.james.mime4j.MimeException;
+import org.apache.james.mime4j.codec.DecodeMonitor;
+import org.apache.james.mime4j.codec.DecoderUtil;
+import org.apache.james.mime4j.util.ByteArrayBuffer;
 import org.apache.james.mime4j.util.ByteSequence;
 import org.apache.james.mime4j.util.CharsetUtil;
 import org.apache.james.mime4j.util.ContentUtil;
@@ -191,6 +195,12 @@ public class RawFieldParser {
      *  is not delimited by any character.
      */
     public String parseValue(final ByteSequence buf, final ParserCursor cursor, final BitSet delimiters) {
+        if (!CharsetUtil.isASCII(buf)) {
+            String value = parseUtf8Filename(buf);
+            if (value != null)
+                return value;
+        }
+
         StringBuilder dst = new StringBuilder();
         boolean whitespace = false;
         while (!cursor.atEnd()) {
@@ -219,6 +229,25 @@ public class RawFieldParser {
         return dst.toString();
     }
 
+    /**
+     * Special case for parsing {@code filename} attribute in nonstandard encoding like:
+     * {@code Content-Disposition: attachment; filename="УПД ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ "СТАНЦИЯ ВИРТУАЛЬНАЯ" 01-05-21.pdf"}
+     * 
+     * @param buf field raw.
+     * @return filename value or {@code null}.
+     */
+    private String parseUtf8Filename(ByteSequence buf) {
+        final String value = new String(buf.toByteArray(), StandardCharsets.UTF_8);
+
+        final String prefix = "filename=\"";
+        final int pos = value.indexOf(prefix);
+        if (pos > 0) {
+            return value.substring(pos + prefix.length(), value.length() - 1);
+        }
+        
+        return null;
+    }
+
     /**
      * Skips semantically insignificant whitespace characters and moves the cursor to the closest
      * non-whitespace character.
@@ -384,14 +413,17 @@ public class RawFieldParser {
         }
         pos++;
         indexFrom++;
+
+        ByteArrayBuffer dstRaw = new ByteArrayBuffer(200);
+
         boolean escaped = false;
         for (int i = indexFrom; i < indexTo; i++, pos++) {
             current = (char) (buf.byteAt(i) & 0xff);
             if (escaped) {
                 if (current != '\"' && current != '\\') {
-                    dst.append('\\');
+                    dstRaw.append('\\');
                 }
-                dst.append(current);
+                dstRaw.append(current);
                 escaped = false;
             } else {
                 if (current == '\"') {
@@ -401,10 +433,18 @@ public class RawFieldParser {
                 if (current == '\\') {
                     escaped = true;
                 } else if (current != '\r' && current != '\n') {
-                    dst.append(current);
+                    dstRaw.append(current);
                 }
             }
         }
+
+        String decoded = ContentUtil.decode(dstRaw);
+        if (decoded.startsWith("=?")) {
+            decoded = DecoderUtil.decodeEncodedWords(decoded, DecodeMonitor.STRICT);
+        }
+
+        dst.append(decoded);
+
         cursor.updatePos(pos);
     }
 
diff --git a/core/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java b/core/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java
index 3504d29f..0a9c983c 100644
--- a/core/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java
+++ b/core/src/main/java/org/apache/james/mime4j/util/CharsetUtil.java
@@ -56,6 +56,23 @@ public class CharsetUtil {
         return (0xFF80 & ch) == 0;
     }
 
+    /**
+     * Returns <code>true</code> if the specified byte array consists entirely of
+     * US ASCII characters.
+     *
+     * @param raw
+     *            byte array to test.
+     * @return <code>true</code> if the specified string consists entirely of
+     *         US ASCII characters, <code>false</code> otherwise.
+     */
+    public static boolean isASCII(ByteSequence raw) {
+        for (int i = 0; i < raw.length(); i++) {
+            if (!isASCII((char) (raw.byteAt(i) & 0xff)))
+                return false;
+        }
+        return true;
+    }
+
     /**
      * Returns <code>true</code> if the specified string consists entirely of
      * US ASCII characters.
diff --git a/dom/src/test/java/org/apache/james/mime4j/field/LenientContentDispositionFieldTest.java b/dom/src/test/java/org/apache/james/mime4j/field/LenientContentDispositionFieldTest.java
index 1031da71..e3ca1c8c 100644
--- a/dom/src/test/java/org/apache/james/mime4j/field/LenientContentDispositionFieldTest.java
+++ b/dom/src/test/java/org/apache/james/mime4j/field/LenientContentDispositionFieldTest.java
@@ -19,17 +19,19 @@
 
 package org.apache.james.mime4j.field;
 
+import java.nio.charset.StandardCharsets;
+import java.util.Date;
+
 import org.apache.james.mime4j.MimeException;
 import org.apache.james.mime4j.dom.field.ContentDispositionField;
 import org.apache.james.mime4j.stream.RawField;
 import org.apache.james.mime4j.stream.RawFieldParser;
+import org.apache.james.mime4j.util.ByteArrayBuffer;
 import org.apache.james.mime4j.util.ByteSequence;
 import org.apache.james.mime4j.util.ContentUtil;
 import org.junit.Assert;
 import org.junit.Test;
 
-import java.util.Date;
-
 public class LenientContentDispositionFieldTest {
 
     static ContentDispositionField parse(final String s) throws MimeException {
@@ -38,6 +40,11 @@ public class LenientContentDispositionFieldTest {
         return ContentDispositionFieldLenientImpl.PARSER.parse(rawField, null);
     }
 
+    static ContentDispositionField parse(final byte[] raw) throws MimeException {
+        RawField rawField = RawFieldParser.DEFAULT.parseField(new ByteArrayBuffer(raw, true));
+        return ContentDispositionFieldLenientImpl.PARSER.parse(rawField, null);
+    }
+
     @Test
     public void testDispositionTypeWithSemiColonNoParams() throws Exception {
         ContentDispositionField f = parse("Content-Disposition: inline;");
@@ -111,6 +118,29 @@ public class LenientContentDispositionFieldTest {
         Assert.assertNull(f.getFilename());
     }
 
+    @Test
+    public void testGetFilenameEncoded() throws Exception {
+        byte[] data = ("Content-Disposition: attachment;\n" +
+            " FileName=\"=?WINDOWS-1251?Q?3244659=5F=C0=EA=F2_=E7=E0_=C8=FE=EB=FC_?=\n" +
+            " =?WINDOWS-1251?Q?2020.pdf?=\"")
+            .getBytes(StandardCharsets.UTF_8);
+        
+        ContentDispositionField f = parse(data);
+
+        Assert.assertEquals("WINDOWS-1251 Q encoded filename", "3244659_Акт за Июль 2020.pdf", f.getFilename());
+    }
+
+    @Test
+    public void testGetFilenameUtf8() throws Exception {
+        byte[] data = 
+            "Content-Disposition: attachment; filename=\"УПД ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ \"СТАНЦИЯ ВИРТУАЛЬНАЯ\" 01-05-21.pdf\""
+            .getBytes(StandardCharsets.UTF_8);
+
+        ContentDispositionField f = parse(data);
+
+        Assert.assertEquals("UTF8 encoded filename", "УПД ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ \"СТАНЦИЯ ВИРТУАЛЬНАЯ\" 01-05-21.pdf", f.getFilename());
+    }
+
     @Test
     public void testGetCreationDate() throws Exception {
         ContentDispositionField f = parse("Content-Disposition: inline; "


---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org