You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@santuario.apache.org by co...@apache.org on 2014/06/18 12:24:26 UTC

svn commit: r1603396 - in /santuario/xml-security-java/branches/1.5.x-fixes/src: main/java/org/apache/xml/security/c14n/implementations/ test/java/org/apache/xml/security/c14n/implementations/

Author: coheigea
Date: Wed Jun 18 10:24:26 2014
New Revision: 1603396

URL: http://svn.apache.org/r1603396
Log:
[SANTUARIO-307] - utf8 encode is broken. Thanks to TH Heung for the patch.


Conflicts:
	src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java
	src/main/java/org/apache/xml/security/stax/impl/transformer/canonicalizer/CanonicalizerBase.java

Modified:
    santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/CanonicalizerBase.java
    santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java
    santuario/xml-security-java/branches/1.5.x-fixes/src/test/java/org/apache/xml/security/c14n/implementations/UtfHelperTest.java

Modified: santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/CanonicalizerBase.java
URL: http://svn.apache.org/viewvc/santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/CanonicalizerBase.java?rev=1603396&r1=1603395&r2=1603396&view=diff
==============================================================================
--- santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/CanonicalizerBase.java (original)
+++ santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/CanonicalizerBase.java Wed Jun 18 10:24:26 2014
@@ -692,7 +692,8 @@ public abstract class CanonicalizerBase 
         final int length = value.length();
         int i = 0;
         while (i < length) {        
-            char c = value.charAt(i++);
+            int c = value.codePointAt(i);
+            i += Character.charCount(c);
 
             switch (c) {
 
@@ -724,7 +725,7 @@ public abstract class CanonicalizerBase 
                 if (c < 0x80) {
                     writer.write(c);
                 } else {
-                    UtfHelpper.writeCharToUtf8(c, writer);
+                    UtfHelpper.writeCodePointToUtf8(c, writer);
                 }
                 continue;
             }
@@ -752,15 +753,16 @@ public abstract class CanonicalizerBase 
         final String target = currentPI.getTarget();
         int length = target.length();
 
-        for (int i = 0; i < length; i++) {         
-            char c = target.charAt(i);
+        for (int i = 0; i < length; ) {
+            int c = target.codePointAt(i);
+            i += Character.charCount(c);
             if (c == 0x0D) {
                 writer.write(XD.clone());
             } else {
                 if (c < 0x80) {
                     writer.write(c);
                 } else {
-                    UtfHelpper.writeCharToUtf8(c, writer);
+                    UtfHelpper.writeCodePointToUtf8(c, writer);
                 }     
             }
         }
@@ -772,12 +774,13 @@ public abstract class CanonicalizerBase 
         if (length > 0) {
             writer.write(' ');
 
-            for (int i = 0; i < length; i++) {            
-                char c = data.charAt(i);
+            for (int i = 0; i < length; ) {
+                int c = data.codePointAt(i);
+                i += Character.charCount(c);
                 if (c == 0x0D) {
                     writer.write(XD.clone());
                 } else {
-                    UtfHelpper.writeCharToUtf8(c, writer);               
+                    UtfHelpper.writeCodePointToUtf8(c, writer);
                 }
             }
         }
@@ -806,15 +809,16 @@ public abstract class CanonicalizerBase 
         final String data = currentComment.getData();
         final int length = data.length();      
 
-        for (int i = 0; i < length; i++) {         
-            char c = data.charAt(i);
+        for (int i = 0; i < length; ) {
+            int c = data.codePointAt(i);
+            i += Character.charCount(c);
             if (c == 0x0D) {
                 writer.write(XD.clone());
             } else {
                 if (c < 0x80) {
                     writer.write(c);
                 } else {
-                    UtfHelpper.writeCharToUtf8(c, writer);
+                    UtfHelpper.writeCodePointToUtf8(c, writer);
                 }    
             }      
         }
@@ -837,8 +841,9 @@ public abstract class CanonicalizerBase 
     ) throws IOException {
         final int length = text.length();
         byte[] toWrite;
-        for (int i = 0; i < length; i++) {
-            char c = text.charAt(i);
+        for (int i = 0; i < length; ) {
+            int c = text.codePointAt(i);
+            i += Character.charCount(c);
 
             switch (c) {
 
@@ -862,7 +867,7 @@ public abstract class CanonicalizerBase 
                 if (c < 0x80) {
                     writer.write(c);
                 } else {
-                    UtfHelpper.writeCharToUtf8(c, writer);
+                    UtfHelpper.writeCodePointToUtf8(c, writer);
                 }
                 continue;
             }

Modified: santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java
URL: http://svn.apache.org/viewvc/santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java?rev=1603396&r1=1603395&r2=1603396&view=diff
==============================================================================
--- santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java (original)
+++ santuario/xml-security-java/branches/1.5.x-fixes/src/main/java/org/apache/xml/security/c14n/implementations/UtfHelpper.java Wed Jun 18 10:24:26 2014
@@ -1,176 +1,265 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.xml.security.c14n.implementations;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Map;
-
-public class UtfHelpper {
-
-    static final void writeByte(
-        final String str,
-        final OutputStream out,
-        Map<String, byte[]> cache
-    ) throws IOException {
-        byte[] result = cache.get(str);	 
-        if (result == null) {
-            result = getStringInUtf8(str);
-            cache.put(str, result);
-        }
-
-        out.write(result);
-    }
-
-    static final void writeCharToUtf8(final char c, final OutputStream out) throws IOException {   	
-        if (c < 0x80) {
-            out.write(c);
-            return;
-        }
-        if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) {
-            //No Surrogates in sun java
-            out.write(0x3f);
-            return;
-        }
-        int bias;
-        int write;
-        char ch;
-        if (c > 0x07FF) {
-            ch = (char)(c>>>12);      
-            write = 0xE0;
-            if (ch > 0) {
-                write |= (ch & 0x0F);
-            } 
-            out.write(write);
-            write = 0x80;
-            bias = 0x3F;        
-        } else {
-            write = 0xC0;
-            bias = 0x1F;
-        }
-        ch = (char)(c>>>6);
-        if (ch > 0) {
-            write |= (ch & bias);
-        } 
-        out.write(write);
-        out.write(0x80 | ((c) & 0x3F));    
-
-    }
-
-    static final void writeStringToUtf8(
-        final String str,
-        final OutputStream out
-    ) throws IOException{	   
-        final int length = str.length();
-        int i = 0;
-        char c;    
-        while (i < length) {
-            c = str.charAt(i++);        
-            if (c < 0x80)  {
-                out.write(c);
-                continue;
-            }
-            if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) {
-                //No Surrogates in sun java
-                out.write(0x3f);
-                continue;
-            }
-            char ch;
-            int bias;
-            int write;
-            if (c > 0x07FF) {
-                ch = (char)(c>>>12);      
-                write = 0xE0;
-                if (ch > 0) {
-                    write |= (ch & 0x0F);
-                } 
-                out.write(write);
-                write = 0x80;
-                bias = 0x3F;        
-            } else {
-                write = 0xC0;
-                bias = 0x1F;
-            }
-            ch = (char)(c>>>6);
-            if (ch > 0) {
-                write |= (ch & bias);
-            } 
-            out.write(write);
-            out.write(0x80 | ((c) & 0x3F));       
-
-        }
-
-    }
-    
-    public static final byte[] getStringInUtf8(final String str) {
-        final int length = str.length();
-        boolean expanded = false;
-        byte[] result = new byte[length];
-        int i = 0;
-        int out = 0;
-        char c;    
-        while (i < length) {
-            c = str.charAt(i++);        
-            if (c < 0x80) {
-                result[out++] = (byte)c;
-                continue;
-            }
-            if ((c >= 0xD800 && c <= 0xDBFF) || (c >= 0xDC00 && c <= 0xDFFF)) {  	
-                //No Surrogates in sun java
-                result[out++] = 0x3f;
-                continue;
-            }
-            if (!expanded) {
-                byte newResult[] = new byte[3*length];
-                System.arraycopy(result, 0, newResult, 0, out);				   	    	
-                result = newResult;
-                expanded = true;
-            } 
-            char ch;
-            int bias;
-            byte write;
-            if (c > 0x07FF) {
-                ch = (char)(c>>>12);      
-                write = (byte)0xE0;
-                if (ch > 0) {
-                    write |= (ch & 0x0F);
-                } 
-                result[out++] = write;
-                write = (byte)0x80;
-                bias = 0x3F;        
-            } else {
-                write = (byte)0xC0;
-                bias = 0x1F;
-            }
-            ch = (char)(c>>>6);
-            if (ch > 0) {
-                write |= (ch & bias);
-            } 
-            result[out++] = write;
-            result[out++] = (byte)(0x80 | ((c) & 0x3F)); 
-        }
-        if (expanded) {
-            byte newResult[] = new byte[out];
-            System.arraycopy(result, 0, newResult, 0, out);
-            result = newResult;
-        }
-        return result;
-    }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.xml.security.c14n.implementations;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.Map;
+
+public final class UtfHelpper {
+
+    /**
+     * Revert to the old behavior (version 2 or before), i.e. surrogate pairs characters becomes 
+     * '??' in output. Set system property org.apache.xml.security.c14n.oldUtf8=true if you want
+     * to verify signatures generated by version 2 or before that contains 32 bit chars in the 
+     * XML document.
+     */
+    private static final boolean oldUtf8 =
+        AccessController.doPrivileged(new PrivilegedAction<Boolean>() {
+            public Boolean run() {
+                return Boolean.getBoolean
+                    ("org.apache.xml.security.c14n.oldUtf8");
+            }
+        });
+
+    private UtfHelpper() {
+        // complete
+    }
+
+    public static void writeByte(
+        final String str,
+        final OutputStream out,
+        Map<String, byte[]> cache
+    ) throws IOException {
+        byte[] result = cache.get(str);
+        if (result == null) {
+            result = getStringInUtf8(str);
+            cache.put(str, result);
+        }
+
+        out.write(result);
+    }
+
+    public static void writeCodePointToUtf8(final int c, final OutputStream out) throws IOException {
+        if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) {
+            // valid code point: c >= 0x0000 && c <= 0x10FFFF
+            out.write(0x3f);
+            return;
+        }
+        if (oldUtf8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+            // version 2 or before output 2 question mark characters for 32 bit chars
+            out.write(0x3f);
+            out.write(0x3f);
+            return;
+        }
+
+        if (c < 0x80) {
+            // 0x00000000 - 0x0000007F
+            // 0xxxxxxx
+            out.write(c);
+            return;
+        }
+        byte extraByte = 0;
+        if (c < 0x800) {
+            // 0x00000080 - 0x000007FF
+            // 110xxxxx 10xxxxxx
+            extraByte = 1;
+        } else if (c < 0x10000) {
+            // 0x00000800 - 0x0000FFFF
+            // 1110xxxx 10xxxxxx 10xxxxxx
+            extraByte = 2;
+        } else if (c < 0x200000) {
+            // 0x00010000 - 0x001FFFFF
+            // 11110xxx 10xxxxx 10xxxxxx 10xxxxxx
+            extraByte = 3;
+        } else if (c < 0x4000000) {
+            // 0x00200000 - 0x03FFFFFF
+            // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+            // already outside valid Character range, just for completeness
+            extraByte = 4;
+        } else if (c <= 0x7FFFFFFF) {
+            // 0x04000000 - 0x7FFFFFFF
+            // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+            // already outside valid Character range, just for completeness
+            extraByte = 5;
+        } else {
+            // 0x80000000 - 0xFFFFFFFF
+            // case not possible as java has no unsigned int
+            out.write(0x3f);
+            return;
+        }
+        
+        byte write;
+        int shift = 6 * extraByte;
+        write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift));
+        out.write(write);
+        for (int i = extraByte - 1; i >= 0; i--) {
+            shift -= 6;
+            write = (byte)(0x80 | ((c >>> shift) & 0x3F));
+            out.write(write);
+        }
+    }
+
+    public static void writeStringToUtf8(
+        final String str, final OutputStream out
+    ) throws IOException {
+        final int length = str.length();
+        int i = 0;
+        int c;
+        while (i < length) {
+            c = str.codePointAt(i);
+            i += Character.charCount(c);
+            if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) {
+                // valid code point: c >= 0x0000 && c <= 0x10FFFF
+                out.write(0x3f);
+                continue;
+            }
+            if (oldUtf8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+                // version 2 or before output 2 question mark characters for 32 bit chars
+                out.write(0x3f);
+                out.write(0x3f);
+                continue;
+            }
+            if (c < 0x80)  {
+                out.write(c);
+                continue;
+            }
+            byte extraByte = 0;
+            if (c < 0x800) {
+                // 0x00000080 - 0x000007FF
+                // 110xxxxx 10xxxxxx
+                extraByte = 1;
+            } else if (c < 0x10000) {
+                // 0x00000800 - 0x0000FFFF
+                // 1110xxxx 10xxxxxx 10xxxxxx
+                extraByte = 2;
+            } else if (c < 0x200000) {
+                // 0x00010000 - 0x001FFFFF
+                // 11110xxx 10xxxxx 10xxxxxx 10xxxxxx
+                extraByte = 3;
+            } else if (c < 0x4000000) {
+                // 0x00200000 - 0x03FFFFFF
+                // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+                // already outside valid Character range, just for completeness
+                extraByte = 4;
+            } else if (c <= 0x7FFFFFFF) {
+                // 0x04000000 - 0x7FFFFFFF
+                // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+                // already outside valid Character range, just for completeness
+                extraByte = 5;
+            } else {
+                // 0x80000000 - 0xFFFFFFFF
+                // case not possible as java has no unsigned int
+                out.write(0x3f);
+                continue;
+            }
+            byte write;
+            int shift = 6 * extraByte;
+            write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift));
+            out.write(write);
+            for (int j = extraByte - 1; j >= 0; j--) {
+                shift -= 6;
+                write = (byte)(0x80 | ((c >>> shift) & 0x3F));
+                out.write(write);
+            }
+
+        }
+
+    }
+
+    public static byte[] getStringInUtf8(final String str) {
+        final int length = str.length();
+        boolean expanded = false;
+        byte[] result = new byte[length];
+        int i = 0;
+        int out = 0;
+        int c;
+        while (i < length) {
+            c = str.codePointAt(i);
+            i += Character.charCount(c);
+            if (!Character.isValidCodePoint(c) || c >= 0xD800 && c <= 0xDBFF || c >= 0xDC00 && c <= 0xDFFF) {
+                // valid code point: c >= 0x0000 && c <= 0x10FFFF
+                result[out++] = (byte)0x3f;
+                continue;
+            }
+            if (oldUtf8 && c >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
+                // version 2 or before output 2 question mark characters for 32 bit chars
+                result[out++] = (byte)0x3f;
+                result[out++] = (byte)0x3f;
+                continue;
+            }
+            if (c < 0x80) {
+                result[out++] = (byte)c;
+                continue;
+            }
+            if (!expanded) {
+                byte newResult[] = new byte[6*length];
+                System.arraycopy(result, 0, newResult, 0, out);
+                result = newResult;
+                expanded = true;
+            }
+            byte extraByte = 0;
+            if (c < 0x800) {
+                // 0x00000080 - 0x000007FF
+                // 110xxxxx 10xxxxxx
+                extraByte = 1;
+            } else if (c < 0x10000) {
+                // 0x00000800 - 0x0000FFFF
+                // 1110xxxx 10xxxxxx 10xxxxxx
+                extraByte = 2;
+            } else if (c < 0x200000) {
+                // 0x00010000 - 0x001FFFFF
+                // 11110xxx 10xxxxx 10xxxxxx 10xxxxxx
+                extraByte = 3;
+            } else if (c < 0x4000000) {
+                // 0x00200000 - 0x03FFFFFF
+                // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+                // already outside valid Character range, just for completeness
+                extraByte = 4;
+            } else if (c <= 0x7FFFFFFF) {
+                // 0x04000000 - 0x7FFFFFFF
+                // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
+                // already outside valid Character range, just for completeness
+                extraByte = 5;
+            } else {
+                // 0x80000000 - 0xFFFFFFFF
+                // case not possible as java has no unsigned int
+                result[out++] = 0x3f;
+                continue;
+            }
+            byte write;
+            int shift = 6 * extraByte;
+            write = (byte)((0xFE << (6 - extraByte)) | (c >>> shift));
+            result[out++] = write;
+            for (int j = extraByte - 1; j >= 0; j--) {
+                shift -= 6;
+                write = (byte)(0x80 | ((c >>> shift) & 0x3F));
+                result[out++] = write;
+            }
+        }
+        if (expanded) {
+            byte newResult[] = new byte[out];
+            System.arraycopy(result, 0, newResult, 0, out);
+            result = newResult;
+        }
+        return result;
+    }
+}

Modified: santuario/xml-security-java/branches/1.5.x-fixes/src/test/java/org/apache/xml/security/c14n/implementations/UtfHelperTest.java
URL: http://svn.apache.org/viewvc/santuario/xml-security-java/branches/1.5.x-fixes/src/test/java/org/apache/xml/security/c14n/implementations/UtfHelperTest.java?rev=1603396&r1=1603395&r2=1603396&view=diff
==============================================================================
--- santuario/xml-security-java/branches/1.5.x-fixes/src/test/java/org/apache/xml/security/c14n/implementations/UtfHelperTest.java (original)
+++ santuario/xml-security-java/branches/1.5.x-fixes/src/test/java/org/apache/xml/security/c14n/implementations/UtfHelperTest.java Wed Jun 18 10:24:26 2014
@@ -23,7 +23,7 @@ import java.io.UnsupportedEncodingExcept
 import java.util.Arrays;
 
 public class UtfHelperTest extends org.junit.Assert {
-    
+
     @org.junit.Test
     public void testBug40156() {
         String s = "\u00e4\u00f6\u00fc";
@@ -37,24 +37,38 @@ public class UtfHelperTest extends org.j
             e.printStackTrace();
         }
     }
-    
+
     @org.junit.Test
     public void testUtf() throws Exception {
-        int chunk = 1 << 16; 
+        // if system property org.apache.xml.security.c14n.oldUtf8=true, can only validate 
+        // 16bit chars against String.getBytes("UTF8");
+        int chunk = (Boolean.getBoolean("org.apache.xml.security.c14n.oldUtf8")) ? (1 << 16)
+            : (Character.MAX_CODE_POINT + 1);
         int j = 0;
         ByteArrayOutputStream charByCharOs = new ByteArrayOutputStream();
         ByteArrayOutputStream strOs = new ByteArrayOutputStream();
 
-        char chs[] = new char[chunk];
+        char chs[] = new char[chunk * 2];
+        int pos = 0;
         for (int i = 0; i < chunk; i++) {
             int ch = (chunk * j) + i;
+            int offset = Character.toChars(ch, chs, pos);
+            pos += offset;
             if (ch == 0xDBFF) {
-                ch = 1;
+                // since 0xDBFF with next character 0xDC00 will form a surrogate pair, so insert a space character in between
+                offset = Character.toChars(Character.SPACE_SEPARATOR, chs, pos);
+                pos += offset;
             }
-            chs[i] = (char)ch;
-            UtfHelpper.writeCharToUtf8((char)ch, charByCharOs);
         }
-        String str = new String(chs);
+        char newResult[] = new char[pos];
+        System.arraycopy(chs, 0, newResult, 0, pos);
+        for (int i = 0; i < pos; ) {
+            int ch = Character.codePointAt(newResult, i);
+            i += Character.charCount(ch);
+            UtfHelpper.writeCodePointToUtf8(ch, charByCharOs);
+        }
+
+        String str = new String(newResult);
         byte a[] = UtfHelpper.getStringInUtf8(str);
         try {
             // System.out.println("chunk:"+j);
@@ -73,5 +87,5 @@ public class UtfHelperTest extends org.j
             e.printStackTrace();
         }
     }
-    
+
 }