You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by da...@apache.org on 2012/09/29 10:56:25 UTC

svn commit: r1391776 - /commons/proper/imaging/trunk/src/main/java/org/apache/commons/imaging/util/UnicodeUtils.java

Author: damjan
Date: Sat Sep 29 08:56:24 2012
New Revision: 1391776

URL: http://svn.apache.org/viewvc?rev=1391776&view=rev
Log:
Delete a lot of unused code.


Modified:
    commons/proper/imaging/trunk/src/main/java/org/apache/commons/imaging/util/UnicodeUtils.java

Modified: commons/proper/imaging/trunk/src/main/java/org/apache/commons/imaging/util/UnicodeUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/imaging/trunk/src/main/java/org/apache/commons/imaging/util/UnicodeUtils.java?rev=1391776&r1=1391775&r2=1391776&view=diff
==============================================================================
--- commons/proper/imaging/trunk/src/main/java/org/apache/commons/imaging/util/UnicodeUtils.java (original)
+++ commons/proper/imaging/trunk/src/main/java/org/apache/commons/imaging/util/UnicodeUtils.java Sat Sep 29 08:56:24 2012
@@ -28,135 +28,6 @@ public abstract class UnicodeUtils imple
     private UnicodeUtils() {
     }
 
-    public static class UnicodeException extends Exception {
-        private static final long serialVersionUID = 5451109579672755601L;
-
-        public UnicodeException(String message) {
-            super(message);
-        }
-    }
-
-    // A default single-byte charset.
-    public static final int CHAR_ENCODING_CODE_ISO_8859_1 = 0;
-    public static final int CHAR_ENCODING_CODE_UTF_16_BIG_ENDIAN_WITH_BOM = 1;
-    public static final int CHAR_ENCODING_CODE_UTF_16_LITTLE_ENDIAN_WITH_BOM = 2;
-    public static final int CHAR_ENCODING_CODE_UTF_16_BIG_ENDIAN_NO_BOM = 3;
-    public static final int CHAR_ENCODING_CODE_UTF_16_LITTLE_ENDIAN_NO_BOM = 4;
-    public static final int CHAR_ENCODING_CODE_UTF_8 = 5;
-    public static final int CHAR_ENCODING_CODE_AMBIGUOUS = -1;
-
-    // /*
-    // * Guess the character encoding of arbitrary character data in a data
-    // * buffer.
-    // *
-    // * The data may not run to the end of the buffer; it may be terminated.
-    // This
-    // * makes the problem much harder, since the character data may be followed
-    // * by arbitrary data.
-    // */
-    // public static int guessCharacterEncoding(byte bytes[], int index)
-    // {
-    // int length = bytes.length - index;
-    //
-    // if (length < 1)
-    // return CHAR_ENCODING_CODE_AMBIGUOUS;
-    //
-    // if (length >= 2)
-    // {
-    // // look for BOM.
-    //
-    // int c1 = 0xff & bytes[index];
-    // int c2 = 0xff & bytes[index + 1];
-    // if (c1 == 0xFF && c2 == 0xFE)
-    // return CHAR_ENCODING_CODE_UTF_16_LITTLE_ENDIAN_WITH_BOM;
-    // else if (c1 == 0xFE && c2 == 0xFF)
-    // return CHAR_ENCODING_CODE_UTF_16_BIG_ENDIAN_WITH_BOM;
-    // }
-    //
-    // }
-    //
-    // /*
-    // * Guess the character encoding of arbitrary character data in a data
-    // * buffer.
-    // *
-    // * The data fills the entire buffer. If it is terminated, the terminator
-    // * byte(s) will be the last bytes in the buffer.
-    // *
-    // * This makes the problem a bit easier.
-    // */
-    // public static int guessCharacterEncodingSimple(byte bytes[], int index)
-    // throws UnicodeException
-    // {
-    // int length = bytes.length - index;
-    //
-    // if (length < 1)
-    // return CHAR_ENCODING_CODE_AMBIGUOUS;
-    //
-    // if (length >= 2)
-    // {
-    // // identify or eliminate UTF-16 with a BOM.
-    //
-    // int c1 = 0xff & bytes[index];
-    // int c2 = 0xff & bytes[index + 1];
-    // if (c1 == 0xFF && c2 == 0xFE)
-    // return CHAR_ENCODING_CODE_UTF_16_LITTLE_ENDIAN_WITH_BOM;
-    // else if (c1 == 0xFE && c2 == 0xFF)
-    // return CHAR_ENCODING_CODE_UTF_16_BIG_ENDIAN_WITH_BOM;
-    // }
-    //
-    // if (length >= 2)
-    // {
-    // // look for optional double-byte terminator.
-    //
-    // int c1 = 0xff & bytes[bytes.length - 2];
-    // int c2 = 0xff & bytes[bytes.length - 1];
-    // if (c1 == 0 && c2 == 0)
-    // {
-    // // definitely a flavor of UTF-16.
-    // if (length % 2 != 0)
-    // throw new UnicodeException(
-    // "Character data with double-byte terminator has an odd length.");
-    //
-    // boolean mayHaveTerminator = true;
-    // boolean mustHaveTerminator = false;
-    // boolean possibleBigEndian = new UnicodeMetricsUTF16NoBOM(
-    // BYTE_ORDER_BIG_ENDIAN).isValid(bytes, index,
-    // mayHaveTerminator, mustHaveTerminator);
-    // boolean possibleLittleEndian = new UnicodeMetricsUTF16NoBOM(
-    // BYTE_ORDER_LITTLE_ENDIAN).isValid(bytes, index,
-    // mayHaveTerminator, mustHaveTerminator);
-    // if ((!possibleBigEndian) && (!possibleLittleEndian))
-    // throw new UnicodeException(
-    // "Invalid character data, possibly UTF-16.");
-    // if (possibleBigEndian && possibleLittleEndian)
-    // return CHAR_ENCODING_CODE_AMBIGUOUS;
-    // if (possibleBigEndian)
-    // return CHAR_ENCODING_CODE_UTF_16_BIG_ENDIAN_NO_BOM;
-    // if (possibleLittleEndian)
-    // return CHAR_ENCODING_CODE_UTF_16_LITTLE_ENDIAN_NO_BOM;
-    // }
-    // }
-    //
-    // List possibleEncodings = new ArrayList();
-    // if (length % 2 == 0)
-    // {
-    // boolean mayHaveTerminator = true;
-    // boolean mustHaveTerminator = false;
-    // boolean possibleBigEndian = new UnicodeMetricsUTF16NoBOM(
-    // BYTE_ORDER_BIG_ENDIAN).isValid(bytes, index,
-    // mayHaveTerminator, mustHaveTerminator);
-    // boolean possibleLittleEndian = new UnicodeMetricsUTF16NoBOM(
-    // BYTE_ORDER_LITTLE_ENDIAN).isValid(bytes, index,
-    // mayHaveTerminator, mustHaveTerminator);
-    //
-    // if (possibleBigEndian)
-    // return CHAR_ENCODING_CODE_UTF_16_BIG_ENDIAN_NO_BOM;
-    // if (possibleLittleEndian)
-    // return CHAR_ENCODING_CODE_UTF_16_LITTLE_ENDIAN_NO_BOM;
-    // }
-    //
-    // }
-
     public static final boolean isValidISO_8859_1(String s) {
         try {
             String roundtrip = new String(s.getBytes("ISO-8859-1"),
@@ -167,275 +38,4 @@ public abstract class UnicodeUtils imple
             throw new RuntimeException("Error parsing string.", e);
         }
     }
-
-    /*
-     * Return the index of the first utf-16 terminator (ie. two even-aligned
-     * nulls). If not found, return -1.
-     */
-    private static int findFirstDoubleByteTerminator(byte bytes[], int index) {
-        for (int i = index; i < bytes.length - 1; i += 2) {
-            int c1 = 0xff & bytes[index];
-            int c2 = 0xff & bytes[index + 1];
-            if (c1 == 0 && c2 == 0) {
-                return i;
-            }
-        }
-        return -1;
-    }
-
-    public final int findEndWithTerminator(byte bytes[], int index)
-            throws UnicodeException {
-        return findEnd(bytes, index, true);
-    }
-
-    public final int findEndWithoutTerminator(byte bytes[], int index)
-            throws UnicodeException {
-        return findEnd(bytes, index, false);
-    }
-
-    protected abstract int findEnd(byte bytes[], int index,
-            boolean includeTerminator) throws UnicodeException;
-
-    public static UnicodeUtils getInstance(int charEncodingCode)
-            throws UnicodeException {
-        switch (charEncodingCode) {
-        case CHAR_ENCODING_CODE_ISO_8859_1:
-            return new UnicodeMetricsASCII();
-        case CHAR_ENCODING_CODE_UTF_8:
-            // Debug.debug("CHAR_ENCODING_CODE_UTF_8");
-            return new UnicodeMetricsUTF8();
-        case CHAR_ENCODING_CODE_UTF_16_BIG_ENDIAN_WITH_BOM:
-        case CHAR_ENCODING_CODE_UTF_16_LITTLE_ENDIAN_WITH_BOM:
-            // Debug.debug("CHAR_ENCODING_CODE_UTF_16_WITH_BOM");
-            return new UnicodeMetricsUTF16WithBOM();
-        case CHAR_ENCODING_CODE_UTF_16_BIG_ENDIAN_NO_BOM:
-            return new UnicodeMetricsUTF16NoBOM(BYTE_ORDER_BIG_ENDIAN);
-        case CHAR_ENCODING_CODE_UTF_16_LITTLE_ENDIAN_NO_BOM:
-            return new UnicodeMetricsUTF16NoBOM(BYTE_ORDER_LITTLE_ENDIAN);
-        default:
-            throw new UnicodeException("Unknown char encoding code: "
-                    + charEncodingCode);
-        }
-    }
-
-    private static class UnicodeMetricsASCII extends UnicodeUtils {
-        @Override
-        public int findEnd(byte bytes[], int index, boolean includeTerminator)
-                throws UnicodeException {
-            for (int i = index; i < bytes.length; i++) {
-                if (bytes[i] == 0) {
-                    return includeTerminator ? i + 1 : i;
-                }
-            }
-            return bytes.length;
-            // throw new UnicodeException("Terminator not found.");
-        }
-    }
-
-    // private static class UnicodeMetricsISO_8859_1 extends UnicodeUtils
-    // {
-    // public int findEnd(byte bytes[], int index, boolean includeTerminator)
-    // throws UnicodeException
-    // {
-    // for (int i = index; i < bytes.length; i++)
-    // {
-    // if (bytes[i] == 0)
-    // return includeTerminator ? i + 1 : i;
-    // }
-    // return bytes.length;
-    // // throw new UnicodeException("Terminator not found.");
-    // }
-    // }
-
-    private static class UnicodeMetricsUTF8 extends UnicodeUtils {
-
-        @Override
-        public int findEnd(byte bytes[], int index, boolean includeTerminator)
-                throws UnicodeException {
-            // http://en.wikipedia.org/wiki/UTF-8
-
-            while (true) {
-                if (index == bytes.length) {
-                    return bytes.length;
-                }
-                if (index > bytes.length) {
-                    throw new UnicodeException("Terminator not found.");
-                }
-
-                int c1 = 0xff & bytes[index++];
-                if (c1 == 0) {
-                    return includeTerminator ? index : index - 1;
-                } else if (c1 <= 0x7f) {
-                    continue;
-                } else if (c1 <= 0xDF) {
-                    if (index >= bytes.length) {
-                        throw new UnicodeException("Invalid unicode.");
-                    }
-                    int c2 = 0xff & bytes[index++];
-                    if (c2 < 0x80 || c2 > 0xBF) {
-                        throw new UnicodeException("Invalid code point.");
-                    }
-                } else if (c1 <= 0xEF) {
-                    if (index >= bytes.length - 1) {
-                        throw new UnicodeException("Invalid unicode.");
-                    }
-                    int c2 = 0xff & bytes[index++];
-                    if (c2 < 0x80 || c2 > 0xBF) {
-                        throw new UnicodeException("Invalid code point.");
-                    }
-                    int c3 = 0xff & bytes[index++];
-                    if (c3 < 0x80 || c3 > 0xBF) {
-                        throw new UnicodeException("Invalid code point.");
-                    }
-                } else if (c1 <= 0xF4) {
-                    if (index >= bytes.length - 2) {
-                        throw new UnicodeException("Invalid unicode.");
-                    }
-                    int c2 = 0xff & bytes[index++];
-                    if (c2 < 0x80 || c2 > 0xBF) {
-                        throw new UnicodeException("Invalid code point.");
-                    }
-                    int c3 = 0xff & bytes[index++];
-                    if (c3 < 0x80 || c3 > 0xBF) {
-                        throw new UnicodeException("Invalid code point.");
-                    }
-                    int c4 = 0xff & bytes[index++];
-                    if (c4 < 0x80 || c4 > 0xBF) {
-                        throw new UnicodeException("Invalid code point.");
-                    }
-                } else {
-                    throw new UnicodeException("Invalid code point.");
-                }
-            }
-        }
-    }
-
-    private abstract static class UnicodeMetricsUTF16 extends UnicodeUtils {
-        protected int byteOrder = BYTE_ORDER_BIG_ENDIAN;
-
-        public UnicodeMetricsUTF16(int byteOrder) {
-            this.byteOrder = byteOrder;
-        }
-
-        public boolean isValid(byte bytes[], int index,
-                boolean mayHaveTerminator, boolean mustHaveTerminator) {
-            // http://en.wikipedia.org/wiki/UTF-16/UCS-2
-
-            while (true) {
-                if (index == bytes.length) {
-                    // end of buffer, no terminator found.
-                    return !mustHaveTerminator;
-                }
-
-                if (index >= bytes.length - 1) {
-                    // end of odd-length buffer, no terminator found.
-                    return false;
-                }
-
-                int c1 = 0xff & bytes[index++];
-                int c2 = 0xff & bytes[index++];
-                int msb1 = byteOrder == BYTE_ORDER_BIG_ENDIAN ? c1 : c2;
-
-                if (c1 == 0 && c2 == 0) {
-                    // terminator found.
-                    return mayHaveTerminator;
-                }
-
-                if (msb1 >= 0xD8) {
-                    // Surrogate pair found.
-
-                    if (msb1 >= 0xDC) {
-                        // invalid first surrogate.
-                        return false;
-                    }
-
-                    if (index >= bytes.length - 1) {
-                        // missing second surrogate.
-                        return false;
-                    }
-
-                    // second word.
-                    int c3 = 0xff & bytes[index++];
-                    int c4 = 0xff & bytes[index++];
-                    int msb2 = byteOrder == BYTE_ORDER_BIG_ENDIAN ? c3 : c4;
-                    if (msb2 < 0xDC) {
-                        // invalid second surrogate.
-                        return false;
-                    }
-                }
-            }
-        }
-
-        @Override
-        public int findEnd(byte bytes[], int index, boolean includeTerminator)
-                throws UnicodeException {
-            // http://en.wikipedia.org/wiki/UTF-16/UCS-2
-
-            while (true) {
-                if (index == bytes.length) {
-                    return bytes.length;
-                }
-                if (index > bytes.length - 1) {
-                    throw new UnicodeException("Terminator not found.");
-                }
-
-                int c1 = 0xff & bytes[index++];
-                int c2 = 0xff & bytes[index++];
-                int msb1 = byteOrder == BYTE_ORDER_BIG_ENDIAN ? c1 : c2;
-
-                if (c1 == 0 && c2 == 0) {
-                    return includeTerminator ? index : index - 2;
-                } else if (msb1 >= 0xD8) {
-                    if (index > bytes.length - 1) {
-                        throw new UnicodeException("Terminator not found.");
-                    }
-                    // second word.
-                    int c3 = 0xff & bytes[index++];
-                    int c4 = 0xff & bytes[index++];
-                    int msb2 = byteOrder == BYTE_ORDER_BIG_ENDIAN ? c3 : c4;
-                    if (msb2 < 0xDC) {
-                        throw new UnicodeException("Invalid code point.");
-                    }
-                }
-            }
-        }
-    }
-
-    private static class UnicodeMetricsUTF16NoBOM extends UnicodeMetricsUTF16 {
-
-        public UnicodeMetricsUTF16NoBOM(final int byteOrder) {
-            super(byteOrder);
-        }
-
-    }
-
-    private static class UnicodeMetricsUTF16WithBOM extends UnicodeMetricsUTF16 {
-
-        public UnicodeMetricsUTF16WithBOM() {
-            super(BYTE_ORDER_BIG_ENDIAN);
-        }
-
-        @Override
-        public int findEnd(byte bytes[], int index, boolean includeTerminator)
-                throws UnicodeException {
-            // http://en.wikipedia.org/wiki/UTF-16/UCS-2
-
-            if (index >= bytes.length - 1) {
-                throw new UnicodeException("Missing BOM.");
-            }
-
-            int c1 = 0xff & bytes[index++];
-            int c2 = 0xff & bytes[index++];
-            if (c1 == 0xFF && c2 == 0xFE) {
-                byteOrder = BYTE_ORDER_LITTLE_ENDIAN;
-            } else if (c1 == 0xFE && c2 == 0xFF) {
-                byteOrder = BYTE_ORDER_BIG_ENDIAN;
-            } else {
-                throw new UnicodeException("Invalid byte order mark.");
-            }
-
-            return super.findEnd(bytes, index, includeTerminator);
-        }
-    }
-
 }