You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@santuario.apache.org by co...@apache.org on 2014/06/18 12:24:26 UTC
svn commit: r1603396 - in
/santuario/xml-security-java/branches/1.5.x-fixes/src:
main/java/org/apache/xml/security/c14n/implementations/
test/java/org/apache/xml/security/c14n/implementations/
Author: coheigea
Date: Wed Jun 18 10:24:26 2014
New Revision: 1603396
URL: http://svn.apache.org/r1603396
Log:
[SANTUARIO-307] - utf8 encode is broken. Thanks to TH Heung for the patch.
Conflicts:
src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java
src/main/java/org/apache/xml/security/stax/impl/transformer/canonicalizer/CanonicalizerBase.java
Modified:
santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/CanonicalizerBase.java
santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java
santuario/xml-security-java/branches/1.5.x-fixes/src/test/java/org/apache/xml/security/c14n/implementations/UtfHelperTest.java
Modified: santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/CanonicalizerBase.java
URL: http://svn.apache.org/viewvc/santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/CanonicalizerBase.java?rev=1603396&r1=1603395&r2=1603396&view=diff
==============================================================================
--- santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/CanonicalizerBase.java (original)
+++ santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/CanonicalizerBase.java Wed Jun 18 10:24:26 2014
@@ -692,7 +692,8 @@ public abstract class CanonicalizerBase
final int length = value.length();
int i = 0;
while (i < length) {
- char c = value.charAt(i++);
+ int c = value.codePointAt(i);
+ i += Character.charCount(c);
switch (c) {
@@ -724,7 +725,7 @@ public abstract class CanonicalizerBase
if (c < 0x80) {
writer.write(c);
} else {
- UtfHelpper.writeCharToUtf8(c, writer);
+ UtfHelpper.writeCodePointToUtf8(c, writer);
}
continue;
}
@@ -752,15 +753,16 @@ public abstract class CanonicalizerBase
final String target = currentPI.getTarget();
int length = target.length();
- for (int i = 0; i < length; i++) {
- char c = target.charAt(i);
+ for (int i = 0; i < length; ) {
+ int c = target.codePointAt(i);
+ i += Character.charCount(c);
if (c == 0x0D) {
writer.write(XD.clone());
} else {
if (c < 0x80) {
writer.write(c);
} else {
- UtfHelpper.writeCharToUtf8(c, writer);
+ UtfHelpper.writeCodePointToUtf8(c, writer);
}
}
}
@@ -772,12 +774,13 @@ public abstract class CanonicalizerBase
if (length > 0) {
writer.write(' ');
- for (int i = 0; i < length; i++) {
- char c = data.charAt(i);
+ for (int i = 0; i < length; ) {
+ int c = data.codePointAt(i);
+ i += Character.charCount(c);
if (c == 0x0D) {
writer.write(XD.clone());
} else {
- UtfHelpper.writeCharToUtf8(c, writer);
+ UtfHelpper.writeCodePointToUtf8(c, writer);
}
}
}
@@ -806,15 +809,16 @@ public abstract class CanonicalizerBase
final String data = currentComment.getData();
final int length = data.length();
- for (int i = 0; i < length; i++) {
- char c = data.charAt(i);
+ for (int i = 0; i < length; ) {
+ int c = data.codePointAt(i);
+ i += Character.charCount(c);
if (c == 0x0D) {
writer.write(XD.clone());
} else {
if (c < 0x80) {
writer.write(c);
} else {
- UtfHelpper.writeCharToUtf8(c, writer);
+ UtfHelpper.writeCodePointToUtf8(c, writer);
}
}
}
@@ -837,8 +841,9 @@ public abstract class CanonicalizerBase
) throws IOException {
final int length = text.length();
byte[] toWrite;
- for (int i = 0; i < length; i++) {
- char c = text.charAt(i);
+ for (int i = 0; i < length; ) {
+ int c = text.codePointAt(i);
+ i += Character.charCount(c);
switch (c) {
@@ -862,7 +867,7 @@ public abstract class CanonicalizerBase
if (c < 0x80) {
writer.write(c);
} else {
- UtfHelpper.writeCharToUtf8(c, writer);
+ UtfHelpper.writeCodePointToUtf8(c, writer);
}
continue;
}
Modified: santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java
URL: http://svn.apache.org/viewvc/santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java?rev=1603396&r1=1603395&r2=1603396&view=diff
==============================================================================
--- santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java (original)
+++ santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java Wed Jun 18 10:24:26 2014
@@ -1,176 +1,265 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.xml.security.c14n.implementations;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Map;
-
-public class UtfHelpper {
-
- static final void writeByte(
- final String str,
- final OutputStream out,
- Map<String, byte[]> cache
- ) throws IOException {
- byte[] result = cache.get(str);
- if (result == null) {
- result = getStringInUtf8(str);
- cache.put(str, result);
- }
-
- out.write(result);
- }
-
- static final void writeCharToUtf8(final char c, final OutputStream out) throws IOException {
- if (c < 0x80) {
- out.write(c);
- return;
- }
- if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) {
- //No Surrogates in sun java
- out.write(0x3f);
- return;
- }
- int bias;
- int write;
- char ch;
- if (c > 0x07FF) {
- ch = (char)(c>>>12);
- write = 0xE0;
- if (ch > 0) {
- write |= (ch & 0x0F);
- }
- out.write(write);
- write = 0x80;
- bias = 0x3F;
- } else {
- write = 0xC0;
- bias = 0x1F;
- }
- ch = (char)(c>>>6);
- if (ch > 0) {
- write |= (ch & bias);
- }
- out.write(write);
- out.write(0x80 | ((c) & 0x3F));
-
- }
-
- static final void writeStringToUtf8(
- final String str,
- final OutputStream out
- ) throws IOException{
- final int length = str.length();
- int i = 0;
- char c;
- while (i < length) {
- c = str.charAt(i++);
- if (c < 0x80) {
- out.write(c);
- continue;
- }
- if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) {
- //No Surrogates in sun java
- out.write(0x3f);
- continue;
- }
- char ch;
- int bias;
- int write;
- if (c > 0x07FF) {
- ch = (char)(c>>>12);
- write = 0xE0;
- if (ch > 0) {
- write |= (ch & 0x0F);
- }
- out.write(write);
- write = 0x80;
- bias = 0x3F;
- } else {
- write = 0xC0;
- bias = 0x1F;
- }
- ch = (char)(c>>>6);
- if (ch > 0) {
- write |= (ch & bias);
- }
- out.write(write);
- out.write(0x80 | ((c) & 0x3F));
-
- }
-
- }
-
- public static final byte[] getStringInUtf8(final String str) {
- final int length = str.length();
- boolean expanded = false;
- byte[] result = new byte[length];
- int i = 0;
- int out = 0;
- char c;
- while (i < length) {
- c = str.charAt(i++);
- if (c < 0x80) {
- result[out++] = (byte)c;
- continue;
- }
- if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) {
- //No Surrogates in sun java
- result[out++] = 0x3f;
- continue;
- }
- if (!expanded) {
- byte newResult[] = new byte[3*length];
- System.arraycopy(result, 0, newResult, 0, out);
- result = newResult;
- expanded = true;
- }
- char ch;
- int bias;
- byte write;
- if (c > 0x07FF) {
- ch = (char)(c>>>12);
- write = (byte)0xE0;
- if (ch > 0) {
- write |= (ch & 0x0F);
- }
- result[out++] = write;
- write = (byte)0x80;
- bias = 0x3F;
- } else {
- write = (byte)0xC0;
- bias = 0x1F;
- }
- ch = (char)(c>>>6);
- if (ch > 0) {
- write |= (ch & bias);
- }
- result[out++] = write;
- result[out++] = (byte)(0x80 | ((c) & 0x3F));
- }
- if (expanded) {
- byte newResult[] = new byte[out];
- System.arraycopy(result, 0, newResult, 0, out);
- result = newResult;
- }
- return result;
- }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.xml.security.c14n.implementations;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.Map;
+
+public final class UtfHelpper {
+
+ /**
+ * Revert to the old behavior (version 2 or before), i.e. surrogate pairs characters becomes
+ * '??' in output. Set system property org.apache.xml.security.c14n.oldUtf8=true if you want
+ * to verify signatures generated by version 2 or before that contains 32 bit chars in the
+ * XML document.
+ */
+ private static final boolean oldUtf8 =
+ AccessController.doPrivileged(new PrivilegedAction<Boolean>() {
+ public Boolean run() {
+ return Boolean.getBoolean
+ ("org.apache.xml.security.c14n.oldUtf8");
+ }
+ });
+
+ private UtfHelpper() {
+ // complete
+ }
+
+ public static void writeByte(
+ final String str,
+ final OutputStream out,
+ Map<String, byte[]> cache
+ ) throws IOException {
+ byte[] result = cache.get(str);
+ if (result == null) {
+ result = getStringInUtf8(str);
+ cache.put(str, result);
+ }
+
+ out.write(result);
+ }
+
+ public static void writeCodePointToUtf8(final int c, final OutputStream out) throws IOException {
+ if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) {
+ // valid code point: c >= 0x0000 && c <= 0x10FFFF
+ out.write(0x3f);
+ return;
+ }
+ if (oldUtf8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+ // version 2 or before output 2 question mark characters for 32 bit chars
+ out.write(0x3f);
+ out.write(0x3f);
+ return;
+ }
+
+ if (c < 0x80) {
+ // 0x00000000 - 0x0000007F
+ // 0xxxxxxx
+ out.write(c);
+ return;
+ }
+ byte extraByte = 0;
+ if (c < 0x800) {
+ // 0x00000080 - 0x000007FF
+ // 110xxxxx 10xxxxxx
+ extraByte = 1;
+ } else if (c < 0x10000) {
+ // 0x00000800 - 0x0000FFFF
+ // 1110xxxx 10xxxxxx 10xxxxxx
+ extraByte = 2;
+ } else if (c < 0x200000) {
+ // 0x00010000 - 0x001FFFFF
+ // 11110xxx 10xxxxx 10xxxxxx 10xxxxxx
+ extraByte = 3;
+ } else if (c < 0x4000000) {
+ // 0x00200000 - 0x03FFFFFF
+ // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ // already outside valid Character range, just for completeness
+ extraByte = 4;
+ } else if (c <= 0x7FFFFFFF) {
+ // 0x04000000 - 0x7FFFFFFF
+ // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ // already outside valid Character range, just for completeness
+ extraByte = 5;
+ } else {
+ // 0x80000000 - 0xFFFFFFFF
+ // case not possible as java has no unsigned int
+ out.write(0x3f);
+ return;
+ }
+
+ byte write;
+ int shift = 6 * extraByte;
+ write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift));
+ out.write(write);
+ for (int i = extraByte - 1; i >= 0; i--) {
+ shift -= 6;
+ write = (byte)(0x80 | ((c >>> shift) & 0x3F));
+ out.write(write);
+ }
+ }
+
+ public static void writeStringToUtf8(
+ final String str, final OutputStream out
+ ) throws IOException {
+ final int length = str.length();
+ int i = 0;
+ int c;
+ while (i < length) {
+ c = str.codePointAt(i);
+ i += Character.charCount(c);
+ if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) {
+ // valid code point: c >= 0x0000 && c <= 0x10FFFF
+ out.write(0x3f);
+ continue;
+ }
+ if (oldUtf8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+ // version 2 or before output 2 question mark characters for 32 bit chars
+ out.write(0x3f);
+ out.write(0x3f);
+ continue;
+ }
+ if (c < 0x80) {
+ out.write(c);
+ continue;
+ }
+ byte extraByte = 0;
+ if (c < 0x800) {
+ // 0x00000080 - 0x000007FF
+ // 110xxxxx 10xxxxxx
+ extraByte = 1;
+ } else if (c < 0x10000) {
+ // 0x00000800 - 0x0000FFFF
+ // 1110xxxx 10xxxxxx 10xxxxxx
+ extraByte = 2;
+ } else if (c < 0x200000) {
+ // 0x00010000 - 0x001FFFFF
+ // 11110xxx 10xxxxx 10xxxxxx 10xxxxxx
+ extraByte = 3;
+ } else if (c < 0x4000000) {
+ // 0x00200000 - 0x03FFFFFF
+ // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ // already outside valid Character range, just for completeness
+ extraByte = 4;
+ } else if (c <= 0x7FFFFFFF) {
+ // 0x04000000 - 0x7FFFFFFF
+ // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ // already outside valid Character range, just for completeness
+ extraByte = 5;
+ } else {
+ // 0x80000000 - 0xFFFFFFFF
+ // case not possible as java has no unsigned int
+ out.write(0x3f);
+ continue;
+ }
+ byte write;
+ int shift = 6 * extraByte;
+ write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift));
+ out.write(write);
+ for (int j = extraByte - 1; j >= 0; j--) {
+ shift -= 6;
+ write = (byte)(0x80 | ((c >>> shift) & 0x3F));
+ out.write(write);
+ }
+
+ }
+
+ }
+
+ public static byte[] getStringInUtf8(final String str) {
+ final int length = str.length();
+ boolean expanded = false;
+ byte[] result = new byte[length];
+ int i = 0;
+ int out = 0;
+ int c;
+ while (i < length) {
+ c = str.codePointAt(i);
+ i += Character.charCount(c);
+ if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) {
+ // valid code point: c >= 0x0000 && c <= 0x10FFFF
+ result[out++] = (byte)0x3f;
+ continue;
+ }
+ if (oldUtf8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+ // version 2 or before output 2 question mark characters for 32 bit chars
+ result[out++] = (byte)0x3f;
+ result[out++] = (byte)0x3f;
+ continue;
+ }
+ if (c < 0x80) {
+ result[out++] = (byte)c;
+ continue;
+ }
+ if (!expanded) {
+ byte newResult[] = new byte[6*length];
+ System.arraycopy(result, 0, newResult, 0, out);
+ result = newResult;
+ expanded = true;
+ }
+ byte extraByte = 0;
+ if (c < 0x800) {
+ // 0x00000080 - 0x000007FF
+ // 110xxxxx 10xxxxxx
+ extraByte = 1;
+ } else if (c < 0x10000) {
+ // 0x00000800 - 0x0000FFFF
+ // 1110xxxx 10xxxxxx 10xxxxxx
+ extraByte = 2;
+ } else if (c < 0x200000) {
+ // 0x00010000 - 0x001FFFFF
+ // 11110xxx 10xxxxx 10xxxxxx 10xxxxxx
+ extraByte = 3;
+ } else if (c < 0x4000000) {
+ // 0x00200000 - 0x03FFFFFF
+ // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ // already outside valid Character range, just for completeness
+ extraByte = 4;
+ } else if (c <= 0x7FFFFFFF) {
+ // 0x04000000 - 0x7FFFFFFF
+ // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+ // already outside valid Character range, just for completeness
+ extraByte = 5;
+ } else {
+ // 0x80000000 - 0xFFFFFFFF
+ // case not possible as java has no unsigned int
+ result[out++] = 0x3f;
+ continue;
+ }
+ byte write;
+ int shift = 6 * extraByte;
+ write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift));
+ result[out++] = write;
+ for (int j = extraByte - 1; j >= 0; j--) {
+ shift -= 6;
+ write = (byte)(0x80 | ((c >>> shift) & 0x3F));
+ result[out++] = write;
+ }
+ }
+ if (expanded) {
+ byte newResult[] = new byte[out];
+ System.arraycopy(result, 0, newResult, 0, out);
+ result = newResult;
+ }
+ return result;
+ }
+}
Modified: santuario/xml-security-java/branches/1.5.x-fixes/src/test/java/org/apache/xml/security/c14n/implementations/UtfHelperTest.java
URL: http://svn.apache.org/viewvc/santuario/xml-security-java/branches/1.5.x-fixes/src/test/java/org/apache/xml/security/c14n/implementations/UtfHelperTest.java?rev=1603396&r1=1603395&r2=1603396&view=diff
==============================================================================
--- santuario/xml-security-java/branches/1.5.x-fixes/src/test/java/org/apache/xml/security/c14n/implementations/UtfHelperTest.java (original)
+++ santuario/xml-security-java/branches/1.5.x-fixes/src/test/java/org/apache/xml/security/c14n/implementations/UtfHelperTest.java Wed Jun 18 10:24:26 2014
@@ -23,7 +23,7 @@ import java.io.UnsupportedEncodingExcept
import java.util.Arrays;
public class UtfHelperTest extends org.junit.Assert {
-
+
@org.junit.Test
public void testBug40156() {
String s = "\u00e4\u00f6\u00fc";
@@ -37,24 +37,38 @@ public class UtfHelperTest extends org.j
e.printStackTrace();
}
}
-
+
@org.junit.Test
public void testUtf() throws Exception {
- int chunk = 1 << 16;
+ // if system property org.apache.xml.security.c14n.oldUtf8=true, can only validate
+ // 16bit chars against String.getBytes("UTF8");
+ int chunk = (Boolean.getBoolean("org.apache.xml.security.c14n.oldUtf8")) ? (1 << 16)
+ : (Character.MAX_CODE_POINT + 1);
int j = 0;
ByteArrayOutputStream charByCharOs = new ByteArrayOutputStream();
ByteArrayOutputStream strOs = new ByteArrayOutputStream();
- char chs[] = new char[chunk];
+ char chs[] = new char[chunk * 2];
+ int pos = 0;
for (int i = 0; i < chunk; i++) {
int ch = (chunk * j) + i;
+ int offset = Character.toChars(ch, chs, pos);
+ pos += offset;
if (ch == 0xDBFF) {
- ch = 1;
+ // since 0xDBFF with next character 0xDC00 will form a surrogate pair, so insert a space character in between
+ offset = Character.toChars(Character.SPACE_SEPARATOR, chs, pos);
+ pos += offset;
}
- chs[i] = (char)ch;
- UtfHelpper.writeCharToUtf8((char)ch, charByCharOs);
}
- String str = new String(chs);
+ char newResult[] = new char[pos];
+ System.arraycopy(chs, 0, newResult, 0, pos);
+ for (int i = 0; i < pos; ) {
+ int ch = Character.codePointAt(newResult, i);
+ i += Character.charCount(ch);
+ UtfHelpper.writeCodePointToUtf8(ch, charByCharOs);
+ }
+
+ String str = new String(newResult);
byte a[] = UtfHelpper.getStringInUtf8(str);
try {
// System.out.println("chunk:"+j);
@@ -73,5 +87,5 @@ public class UtfHelperTest extends org.j
e.printStackTrace();
}
}
-
+
}