You are viewing a plain text version of this content. The canonical link for it is here.
Posted to server-dev@james.apache.org by bt...@apache.org on 2023/11/27 12:23:18 UTC
(james-mime4j) 01/02: RawField.getBody() in UTF-8
This is an automated email from the ASF dual-hosted git repository.
btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-mime4j.git
commit 8122e4b901274b2e377f2642b4874951a8f6fea1
Author: Shamil Vakhitov <sh...@bgerp.org>
AuthorDate: Wed Aug 18 20:25:15 2021 +0500
RawField.getBody() in UTF-8
---
.../org/apache/james/mime4j/stream/RawField.java | 3 ++-
.../org/apache/james/mime4j/util/ContentUtil.java | 31 ++++++++++++++++++++++
.../james/mime4j/field/UnstructuredFieldTest.java | 14 ++++++++++
.../james/mime4j/message/HeaderImplTest.java | 9 +++++--
4 files changed, 54 insertions(+), 3 deletions(-)
diff --git a/core/src/main/java/org/apache/james/mime4j/stream/RawField.java b/core/src/main/java/org/apache/james/mime4j/stream/RawField.java
index 8bcaa773..e64c6a73 100644
--- a/core/src/main/java/org/apache/james/mime4j/stream/RawField.java
+++ b/core/src/main/java/org/apache/james/mime4j/stream/RawField.java
@@ -19,6 +19,7 @@
package org.apache.james.mime4j.stream;
+import java.nio.charset.StandardCharsets;
import java.util.Locale;
import org.apache.james.mime4j.util.ByteSequence;
@@ -83,7 +84,7 @@ public final class RawField implements Field {
if (len > off + 1 && (CharsetUtil.isWhitespace((char) (raw.byteAt(off) & 0xff)))) {
off++;
}
- return MimeUtil.unfold(ContentUtil.decode(raw, off, len - off));
+ return MimeUtil.unfold(ContentUtil.decode(raw, off, len - off, StandardCharsets.UTF_8));
}
return null;
}
diff --git a/core/src/main/java/org/apache/james/mime4j/util/ContentUtil.java b/core/src/main/java/org/apache/james/mime4j/util/ContentUtil.java
index d6d2d96e..1e078120 100644
--- a/core/src/main/java/org/apache/james/mime4j/util/ContentUtil.java
+++ b/core/src/main/java/org/apache/james/mime4j/util/ContentUtil.java
@@ -223,6 +223,37 @@ public class ContentUtil {
return new String(underlying);
}
+ /**
+ * Decodes a sub-sequence of the specified sequence of bytes into a string
+ * using the US-ASCII charset with falling back to {@link #decode(Charset, ByteSequence, int, int)}
+ * on a first non US-ASCII character.
+ *
+ * @param byteSequence
+ * sequence of bytes to decode.
+ * @param offset
+ * offset into the byte sequence.
+ * @param length
+ * number of bytes.
+ * @param charset
+ * fallback charset.
+ * @return decoded string.
+ */
+ public static String decode(ByteSequence byteSequence, int offset, int length, Charset charset) {
+ if (byteSequence == null) {
+ return null;
+ }
+
+ StringBuilder buf = new StringBuilder(length);
+ for (int i = offset; i < offset + length; i++) {
+ char ch = (char) (byteSequence.byteAt(i) & 0xff);
+ if (!CharsetUtil.isASCII(ch)) {
+ return decode(charset, byteSequence, offset, length);
+ }
+ buf.append(ch);
+ }
+ return buf.toString();
+ }
+
/**
* Decodes a sub-sequence of the specified sequence of bytes into a string
* using the specified charset.
diff --git a/dom/src/test/java/org/apache/james/mime4j/field/UnstructuredFieldTest.java b/dom/src/test/java/org/apache/james/mime4j/field/UnstructuredFieldTest.java
index 96986c9b..23f3b0a9 100644
--- a/dom/src/test/java/org/apache/james/mime4j/field/UnstructuredFieldTest.java
+++ b/dom/src/test/java/org/apache/james/mime4j/field/UnstructuredFieldTest.java
@@ -19,7 +19,11 @@
package org.apache.james.mime4j.field;
+import java.nio.charset.StandardCharsets;
+
+import org.apache.james.mime4j.codec.DecodeMonitor;
import org.apache.james.mime4j.dom.field.UnstructuredField;
+import org.apache.james.mime4j.util.ByteArrayBuffer;
import org.junit.Assert;
import org.junit.Test;
@@ -39,4 +43,14 @@ public class UnstructuredFieldTest {
Assert.assertEquals("Testing value without a leading ' '", "yada", f.getValue());
}
+ @Test
+ public void testGetBodyUtf8() throws Exception {
+ UnstructuredField f;
+
+ byte[] data = "Subject: Счет для ООО \"СТАНЦИЯ ВИРТУАЛЬНАЯ\" от ООО \"Цифровые системы\"".getBytes(StandardCharsets.UTF_8);
+
+ f = (UnstructuredField) DefaultFieldParser.parse(new ByteArrayBuffer(data, true), DecodeMonitor.SILENT);
+ Assert.assertEquals("Testing UTF8 value 1", "Счет для ООО \"СТАНЦИЯ ВИРТУАЛЬНАЯ\" от ООО \"Цифровые системы\"", f.getValue());
+ }
+
}
diff --git a/dom/src/test/java/org/apache/james/mime4j/message/HeaderImplTest.java b/dom/src/test/java/org/apache/james/mime4j/message/HeaderImplTest.java
index 0ed76662..5362ecc6 100644
--- a/dom/src/test/java/org/apache/james/mime4j/message/HeaderImplTest.java
+++ b/dom/src/test/java/org/apache/james/mime4j/message/HeaderImplTest.java
@@ -19,7 +19,10 @@
package org.apache.james.mime4j.message;
+import java.nio.charset.StandardCharsets;
+
import org.apache.commons.io.output.ByteArrayOutputStream;
+import org.apache.james.mime4j.codec.DecodeMonitor;
import org.apache.james.mime4j.dom.Header;
import org.apache.james.mime4j.field.DefaultFieldParser;
import org.apache.james.mime4j.message.DefaultMessageWriter;
@@ -55,7 +58,9 @@ public class HeaderImplTest {
public void testWriteSpecialCharacters() throws Exception {
String hello = SWISS_GERMAN_HELLO;
Header header = new HeaderImpl();
- header.addField(DefaultFieldParser.parse("Hello: " + hello));
+ byte[] utf8bytes = ("Hello: " + hello).getBytes(StandardCharsets.UTF_8);
+ ByteArrayBuffer raw = new ByteArrayBuffer(utf8bytes, true);
+ header.addField(DefaultFieldParser.parse(raw, DecodeMonitor.SILENT));
Field field = header.getField("Hello");
Assert.assertNotNull(field);
@@ -70,7 +75,7 @@ public class HeaderImplTest {
byte[] b = outstream.toByteArray();
ByteArrayBuffer buf = new ByteArrayBuffer(b.length);
buf.append(b, 0, b.length);
- String s = ContentUtil.decode(buf);
+ String s = ContentUtil.decode(StandardCharsets.UTF_8, buf);
Assert.assertEquals("Hello: " + SWISS_GERMAN_HELLO + "\r\n\r\n", s);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: server-dev-unsubscribe@james.apache.org
For additional commands, e-mail: server-dev-help@james.apache.org