You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by bt...@apache.org on 2023/11/27 12:23:18 UTC

(james-mime4j) 01/02: RawField.getBody() in UTF-8

This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-mime4j.git

commit 8122e4b901274b2e377f2642b4874951a8f6fea1
Author: Shamil Vakhitov <sh...@bgerp.org>
AuthorDate: Wed Aug 18 20:25:15 2021 +0500

    RawField.getBody() in UTF-8
---
 .../org/apache/james/mime4j/stream/RawField.java   |  3 ++-
 .../org/apache/james/mime4j/util/ContentUtil.java  | 31 ++++++++++++++++++++++
 .../james/mime4j/field/UnstructuredFieldTest.java  | 14 ++++++++++
 .../james/mime4j/message/HeaderImplTest.java       |  9 +++++--
 4 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/core/src/main/java/org/apache/james/mime4j/stream/RawField.java b/core/src/main/java/org/apache/james/mime4j/stream/RawField.java
index 8bcaa773..e64c6a73 100644
--- a/core/src/main/java/org/apache/james/mime4j/stream/RawField.java
+++ b/core/src/main/java/org/apache/james/mime4j/stream/RawField.java
@@ -19,6 +19,7 @@
 
 package org.apache.james.mime4j.stream;
 
+import java.nio.charset.StandardCharsets;
 import java.util.Locale;
 
 import org.apache.james.mime4j.util.ByteSequence;
@@ -83,7 +84,7 @@ public final class RawField implements Field {
             if (len > off + 1 && (CharsetUtil.isWhitespace((char) (raw.byteAt(off) & 0xff)))) {
                 off++;
             }
-            return MimeUtil.unfold(ContentUtil.decode(raw, off, len - off));
+            return MimeUtil.unfold(ContentUtil.decode(raw, off, len - off, StandardCharsets.UTF_8));
         }
         return null;
     }
diff --git a/core/src/main/java/org/apache/james/mime4j/util/ContentUtil.java b/core/src/main/java/org/apache/james/mime4j/util/ContentUtil.java
index d6d2d96e..1e078120 100644
--- a/core/src/main/java/org/apache/james/mime4j/util/ContentUtil.java
+++ b/core/src/main/java/org/apache/james/mime4j/util/ContentUtil.java
@@ -223,6 +223,37 @@ public class ContentUtil {
         return new String(underlying);
     }
 
+    /**
+     * Decodes a sub-sequence of the specified sequence of bytes into a string
+     * using the US-ASCII charset with falling back to {@link #decode(Charset, ByteSequence, int, int)} 
+     * on a first non US-ASCII character.
+     * 
+     * @param byteSequence
+     *            sequence of bytes to decode.
+     * @param offset
+     *            offset into the byte sequence.
+     * @param length
+     *            number of bytes.
+     * @param charset
+     *            fallback charset.
+     * @return decoded string.
+     */
+    public static String decode(ByteSequence byteSequence, int offset, int length, Charset charset) {
+        if (byteSequence == null) {
+            return null;
+        }
+
+        StringBuilder buf = new StringBuilder(length);
+        for (int i = offset; i < offset + length; i++) {
+            char ch = (char) (byteSequence.byteAt(i) & 0xff);
+            if (!CharsetUtil.isASCII(ch)) {
+                return decode(charset, byteSequence, offset, length);
+            }
+            buf.append(ch);
+        }
+        return buf.toString();
+    }
+
     /**
      * Decodes a sub-sequence of the specified sequence of bytes into a string
      * using the specified charset.
diff --git a/dom/src/test/java/org/apache/james/mime4j/field/UnstructuredFieldTest.java b/dom/src/test/java/org/apache/james/mime4j/field/UnstructuredFieldTest.java
index 96986c9b..23f3b0a9 100644
--- a/dom/src/test/java/org/apache/james/mime4j/field/UnstructuredFieldTest.java
+++ b/dom/src/test/java/org/apache/james/mime4j/field/UnstructuredFieldTest.java
@@ -19,7 +19,11 @@
 
 package org.apache.james.mime4j.field;
 
+import java.nio.charset.StandardCharsets;
+
+import org.apache.james.mime4j.codec.DecodeMonitor;
 import org.apache.james.mime4j.dom.field.UnstructuredField;
+import org.apache.james.mime4j.util.ByteArrayBuffer;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -39,4 +43,14 @@ public class UnstructuredFieldTest {
         Assert.assertEquals("Testing value without a leading ' '", "yada", f.getValue());
     }
 
+    @Test
+    public void testGetBodyUtf8() throws Exception {
+        UnstructuredField f;
+
+        byte[] data = "Subject: Счет для ООО \"СТАНЦИЯ ВИРТУАЛЬНАЯ\" от ООО \"Цифровые системы\"".getBytes(StandardCharsets.UTF_8);
+
+        f = (UnstructuredField) DefaultFieldParser.parse(new ByteArrayBuffer(data, true), DecodeMonitor.SILENT);
+        Assert.assertEquals("Testing UTF8 value 1", "Счет для ООО \"СТАНЦИЯ ВИРТУАЛЬНАЯ\" от ООО \"Цифровые системы\"", f.getValue());
+    }
+
 }
diff --git a/dom/src/test/java/org/apache/james/mime4j/message/HeaderImplTest.java b/dom/src/test/java/org/apache/james/mime4j/message/HeaderImplTest.java
index 0ed76662..5362ecc6 100644
--- a/dom/src/test/java/org/apache/james/mime4j/message/HeaderImplTest.java
+++ b/dom/src/test/java/org/apache/james/mime4j/message/HeaderImplTest.java
@@ -19,7 +19,10 @@
 
 package org.apache.james.mime4j.message;
 
+import java.nio.charset.StandardCharsets;
+
 import org.apache.commons.io.output.ByteArrayOutputStream;
+import org.apache.james.mime4j.codec.DecodeMonitor;
 import org.apache.james.mime4j.dom.Header;
 import org.apache.james.mime4j.field.DefaultFieldParser;
 import org.apache.james.mime4j.message.DefaultMessageWriter;
@@ -55,7 +58,9 @@ public class HeaderImplTest {
     public void testWriteSpecialCharacters() throws Exception {
         String hello = SWISS_GERMAN_HELLO;
         Header header = new HeaderImpl();
-        header.addField(DefaultFieldParser.parse("Hello: " + hello));
+        byte[] utf8bytes = ("Hello: " + hello).getBytes(StandardCharsets.UTF_8);
+        ByteArrayBuffer raw = new ByteArrayBuffer(utf8bytes, true);
+        header.addField(DefaultFieldParser.parse(raw, DecodeMonitor.SILENT));
 
         Field field = header.getField("Hello");
         Assert.assertNotNull(field);
@@ -70,7 +75,7 @@ public class HeaderImplTest {
         byte[] b = outstream.toByteArray();
         ByteArrayBuffer buf = new ByteArrayBuffer(b.length);
         buf.append(b, 0, b.length);
-        String s = ContentUtil.decode(buf);
+        String s = ContentUtil.decode(StandardCharsets.UTF_8, buf);
 
         Assert.assertEquals("Hello: " + SWISS_GERMAN_HELLO + "\r\n\r\n", s);
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org