You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2021/01/21 16:36:44 UTC

[commons-io] 01/02: Sort members.

This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-io.git

commit e403b84597a4299d1cd2d1b93224eade2530128c
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Thu Jan 21 11:22:50 2021 -0500

    Sort members.
---
 .../apache/commons/io/input/XmlStreamReader.java   | 708 ++++++++++-----------
 1 file changed, 354 insertions(+), 354 deletions(-)

diff --git a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
index d77d382..ee80736 100644
--- a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
+++ b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
@@ -110,24 +110,161 @@ public class XmlStreamReader extends Reader {
         new ByteOrderMark(EBCDIC,   0x4C, 0x6F, 0xA7, 0x94)
     };
 
-    private final Reader reader;
+    private static final Pattern CHARSET_PATTERN = Pattern
+            .compile("charset=[\"']?([.[^; \"']]*)[\"']?");
 
-    private final String encoding;
+    /**
+     * Pattern capturing the encoding of the "xml" processing instruction.
+     */
+    public static final Pattern ENCODING_PATTERN = Pattern.compile(
+            "<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))",
+            Pattern.MULTILINE);
 
-    private final String defaultEncoding;
+    private static final String RAW_EX_1 =
+        "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch";
+
+    private static final String RAW_EX_2 =
+        "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM";
+
+    private static final String HTTP_EX_1 =
+        "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL";
+
+    private static final String HTTP_EX_2 =
+        "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch";
+
+    private static final String HTTP_EX_3 =
+        "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME";
 
     /**
-     * Returns the default encoding to use if none is set in HTTP content-type,
-     * XML prolog and the rules based on content-type are not adequate.
-     * <p>
-     * If it is NULL the content-type based rules are used.
+     * Returns charset parameter value, NULL if not present, NULL if
+     * httpContentType is NULL.
      *
-     * @return the default encoding to use.
+     * @param httpContentType the HTTP content type
+     * @return The content type encoding (upcased)
      */
-    public String getDefaultEncoding() {
-        return defaultEncoding;
+    static String getContentTypeEncoding(final String httpContentType) {
+        String encoding = null;
+        if (httpContentType != null) {
+            final int i = httpContentType.indexOf(";");
+            if (i > -1) {
+                final String postMime = httpContentType.substring(i + 1);
+                final Matcher m = CHARSET_PATTERN.matcher(postMime);
+                encoding = m.find() ? m.group(1) : null;
+                encoding = encoding != null ? encoding.toUpperCase(Locale.ROOT) : null;
+            }
+        }
+        return encoding;
+    }
+
+    /**
+     * Returns MIME type or NULL if httpContentType is NULL.
+     *
+     * @param httpContentType the HTTP content type
+     * @return The mime content type
+     */
+    static String getContentTypeMime(final String httpContentType) {
+        String mime = null;
+        if (httpContentType != null) {
+            final int i = httpContentType.indexOf(";");
+            if (i >= 0) {
+                mime = httpContentType.substring(0, i);
+            } else {
+                mime = httpContentType;
+            }
+            mime = mime.trim();
+        }
+        return mime;
+    }
+
+    /**
+     * Returns the encoding declared in the <?xml encoding=...?>, NULL if none.
+     *
+     * @param inputStream InputStream to create the reader from.
+     * @param guessedEnc guessed encoding
+     * @return the encoding declared in the <?xml encoding=...?>
+     * @throws IOException thrown if there is a problem reading the stream.
+     */
+    private static String getXmlProlog(final InputStream inputStream, final String guessedEnc)
+            throws IOException {
+        String encoding = null;
+        if (guessedEnc != null) {
+            final byte[] bytes = new byte[BUFFER_SIZE];
+            inputStream.mark(BUFFER_SIZE);
+            int offset = 0;
+            int max = BUFFER_SIZE;
+            int c = inputStream.read(bytes, offset, max);
+            int firstGT = -1;
+            String xmlProlog = ""; // avoid possible NPE warning (cannot happen; this just silences the warning)
+            while (c != -1 && firstGT == -1 && offset < BUFFER_SIZE) {
+                offset += c;
+                max -= c;
+                c = inputStream.read(bytes, offset, max);
+                xmlProlog = new String(bytes, 0, offset, guessedEnc);
+                firstGT = xmlProlog.indexOf('>');
+            }
+            if (firstGT == -1) {
+                if (c == -1) {
+                    throw new IOException("Unexpected end of XML stream");
+                }
+                throw new IOException(
+                        "XML prolog or ROOT element not found on first "
+                                + offset + " bytes");
+            }
+            final int bytesRead = offset;
+            if (bytesRead > 0) {
+                inputStream.reset();
+                final BufferedReader bReader = new BufferedReader(new StringReader(
+                        xmlProlog.substring(0, firstGT + 1)));
+                final StringBuffer prolog = new StringBuffer();
+                String line;
+                while ((line = bReader.readLine()) != null) {
+                    prolog.append(line);
+                }
+                final Matcher m = ENCODING_PATTERN.matcher(prolog);
+                if (m.find()) {
+                    encoding = m.group(1).toUpperCase(Locale.ROOT);
+                    encoding = encoding.substring(1, encoding.length() - 1);
+                }
+            }
+        }
+        return encoding;
+    }
+
+    /**
+     * Indicates if the MIME type belongs to the APPLICATION XML family.
+     *
+     * @param mime The mime type
+     * @return true if the mime type belongs to the APPLICATION XML family,
+     * otherwise false
+     */
+    static boolean isAppXml(final String mime) {
+        return mime != null &&
+               (mime.equals("application/xml") ||
+                mime.equals("application/xml-dtd") ||
+                mime.equals("application/xml-external-parsed-entity") ||
+               mime.startsWith("application/") && mime.endsWith("+xml"));
+    }
+
+    /**
+     * Indicates if the MIME type belongs to the TEXT XML family.
+     *
+     * @param mime The mime type
+     * @return true if the mime type belongs to the TEXT XML family,
+     * otherwise false
+     */
+    static boolean isTextXml(final String mime) {
+        return mime != null &&
+              (mime.equals("text/xml") ||
+               mime.equals("text/xml-external-parsed-entity") ||
+              mime.startsWith("text/") && mime.endsWith("+xml"));
     }
 
+    private final Reader reader;
+
+    private final String encoding;
+
+    private final String defaultEncoding;
+
     /**
      * Creates a Reader for a File.
      * <p>
@@ -229,62 +366,6 @@ public class XmlStreamReader extends Reader {
     }
 
     /**
-     * Creates a Reader using the InputStream of a URL.
-     * <p>
-     * If the URL is not of type HTTP and there is not 'content-type' header in
-     * the fetched data it uses the same logic used for Files.
-     * <p>
-     * If the URL is a HTTP Url or there is a 'content-type' header in the
-     * fetched data it uses the same logic used for an InputStream with
-     * content-type.
-     * <p>
-     * It does a lenient charset encoding detection, check the constructor with
-     * the lenient parameter for details.
-     *
-     * @param url URL to create a Reader from.
-     * @throws IOException thrown if there is a problem reading the stream of
-     *         the URL.
-     */
-    public XmlStreamReader(final URL url) throws IOException {
-        this(Objects.requireNonNull(url, "url").openConnection(), null);
-    }
-
-    /**
-     * Creates a Reader using the InputStream of a URLConnection.
-     * <p>
-     * If the URLConnection is not of type HttpURLConnection and there is not
-     * 'content-type' header in the fetched data it uses the same logic used for
-     * files.
-     * <p>
-     * If the URLConnection is a HTTP Url or there is a 'content-type' header in
-     * the fetched data it uses the same logic used for an InputStream with
-     * content-type.
-     * <p>
-     * It does a lenient charset encoding detection, check the constructor with
-     * the lenient parameter for details.
-     *
-     * @param conn URLConnection to create a Reader from.
-     * @param defaultEncoding The default encoding
-     * @throws IOException thrown if there is a problem reading the stream of
-     *         the URLConnection.
-     */
-    public XmlStreamReader(final URLConnection conn, final String defaultEncoding) throws IOException {
-        Objects.requireNonNull(conn, "conm");
-        this.defaultEncoding = defaultEncoding;
-        final boolean lenient = true;
-        final String contentType = conn.getContentType();
-        final InputStream inputStream = conn.getInputStream();
-        final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(inputStream, BUFFER_SIZE), false, BOMS);
-        final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
-        if (conn instanceof HttpURLConnection || contentType != null) {
-            this.encoding = processHttpStream(bom, pis, contentType, lenient);
-        } else {
-            this.encoding = doRawStream(bom, pis, lenient);
-        }
-        this.reader = new InputStreamReader(pis, encoding);
-    }
-
-    /**
      * Creates a Reader using an InputStream and the associated content-type
      * header.
      * <p>
@@ -306,6 +387,7 @@ public class XmlStreamReader extends Reader {
         this(inputStream, httpContentType, true);
     }
 
+
     /**
      * Creates a Reader using an InputStream and the associated content-type
      * header. This constructor is lenient regarding the encoding detection.
@@ -335,19 +417,13 @@ public class XmlStreamReader extends Reader {
      *        the charset encoding.
      * @param lenient indicates if the charset encoding detection should be
      *        relaxed.
-     * @param defaultEncoding The default encoding
      * @throws IOException thrown if there is a problem reading the file.
      * @throws XmlStreamReaderException thrown if the charset encoding could not
      *         be determined according to the specs.
      */
     public XmlStreamReader(final InputStream inputStream, final String httpContentType,
-            final boolean lenient, final String defaultEncoding) throws IOException {
-        Objects.requireNonNull(inputStream, "inputStream");
-        this.defaultEncoding = defaultEncoding;
-        final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(inputStream, BUFFER_SIZE), false, BOMS);
-        final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
-        this.encoding = processHttpStream(bom, pis, httpContentType, lenient);
-        this.reader = new InputStreamReader(pis, encoding);
+            final boolean lenient) throws IOException {
+        this(inputStream, httpContentType, lenient, null);
     }
 
     /**
@@ -379,127 +455,155 @@ public class XmlStreamReader extends Reader {
      *        the charset encoding.
      * @param lenient indicates if the charset encoding detection should be
      *        relaxed.
+     * @param defaultEncoding The default encoding
      * @throws IOException thrown if there is a problem reading the file.
      * @throws XmlStreamReaderException thrown if the charset encoding could not
      *         be determined according to the specs.
      */
     public XmlStreamReader(final InputStream inputStream, final String httpContentType,
-            final boolean lenient) throws IOException {
-        this(inputStream, httpContentType, lenient, null);
+            final boolean lenient, final String defaultEncoding) throws IOException {
+        Objects.requireNonNull(inputStream, "inputStream");
+        this.defaultEncoding = defaultEncoding;
+        final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(inputStream, BUFFER_SIZE), false, BOMS);
+        final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
+        this.encoding = processHttpStream(bom, pis, httpContentType, lenient);
+        this.reader = new InputStreamReader(pis, encoding);
     }
 
     /**
-     * Returns the charset encoding of the XmlStreamReader.
+     * Creates a Reader using the InputStream of a URL.
+     * <p>
+     * If the URL is not of type HTTP and there is not 'content-type' header in
+     * the fetched data it uses the same logic used for Files.
+     * <p>
+     * If the URL is a HTTP Url or there is a 'content-type' header in the
+     * fetched data it uses the same logic used for an InputStream with
+     * content-type.
+     * <p>
+     * It does a lenient charset encoding detection, check the constructor with
+     * the lenient parameter for details.
      *
-     * @return charset encoding.
+     * @param url URL to create a Reader from.
+     * @throws IOException thrown if there is a problem reading the stream of
+     *         the URL.
      */
-    public String getEncoding() {
-        return encoding;
+    public XmlStreamReader(final URL url) throws IOException {
+        this(Objects.requireNonNull(url, "url").openConnection(), null);
     }
 
     /**
-     * Invokes the underlying reader's {@code read(char[], int, int)} method.
-     * @param buf the buffer to read the characters into
-     * @param offset The start offset
-     * @param len The number of bytes to read
-     * @return the number of characters read or -1 if the end of stream
-     * @throws IOException if an I/O error occurs
-     */
-    @Override
-    public int read(final char[] buf, final int offset, final int len) throws IOException {
-        return reader.read(buf, offset, len);
-    }
-
-    /**
-     * Closes the XmlStreamReader stream.
+     * Creates a Reader using the InputStream of a URLConnection.
+     * <p>
+     * If the URLConnection is not of type HttpURLConnection and there is not
+     * 'content-type' header in the fetched data it uses the same logic used for
+     * files.
+     * <p>
+     * If the URLConnection is a HTTP Url or there is a 'content-type' header in
+     * the fetched data it uses the same logic used for an InputStream with
+     * content-type.
+     * <p>
+     * It does a lenient charset encoding detection, check the constructor with
+     * the lenient parameter for details.
      *
-     * @throws IOException thrown if there was a problem closing the stream.
+     * @param conn URLConnection to create a Reader from.
+     * @param defaultEncoding The default encoding
+     * @throws IOException thrown if there is a problem reading the stream of
+     *         the URLConnection.
      */
-    @Override
-    public void close() throws IOException {
-        reader.close();
+    public XmlStreamReader(final URLConnection conn, final String defaultEncoding) throws IOException {
+        Objects.requireNonNull(conn, "conm");
+        this.defaultEncoding = defaultEncoding;
+        final boolean lenient = true;
+        final String contentType = conn.getContentType();
+        final InputStream inputStream = conn.getInputStream();
+        final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(inputStream, BUFFER_SIZE), false, BOMS);
+        final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
+        if (conn instanceof HttpURLConnection || contentType != null) {
+            this.encoding = processHttpStream(bom, pis, contentType, lenient);
+        } else {
+            this.encoding = doRawStream(bom, pis, lenient);
+        }
+        this.reader = new InputStreamReader(pis, encoding);
     }
 
     /**
-     * Process the raw stream.
+     * Calculate the HTTP encoding.
      *
-     * @param bom BOMInputStream to detect byte order marks
-     * @param pis BOMInputStream to guess XML encoding
+     * @param httpContentType The HTTP content type
+     * @param bomEnc BOM encoding
+     * @param xmlGuessEnc XML Guess encoding
+     * @param xmlEnc XML encoding
      * @param lenient indicates if the charset encoding detection should be
      *        relaxed.
-     * @return the encoding to be used
+     * @return the HTTP encoding
      * @throws IOException thrown if there is a problem reading the stream.
      */
-    private String doRawStream(final BOMInputStream bom, final BOMInputStream pis, final boolean lenient)
-            throws IOException {
-        final String bomEnc      = bom.getBOMCharsetName();
-        final String xmlGuessEnc = pis.getBOMCharsetName();
-        final String xmlEnc = getXmlProlog(pis, xmlGuessEnc);
-        try {
-            return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
-        } catch (final XmlStreamReaderException ex) {
-            if (lenient) {
-                return doLenientDetection(null, ex);
+    String calculateHttpEncoding(final String httpContentType,
+            final String bomEnc, final String xmlGuessEnc, final String xmlEnc,
+            final boolean lenient) throws IOException {
+
+        // Lenient and has XML encoding
+        if (lenient && xmlEnc != null) {
+            return xmlEnc;
+        }
+
+        // Determine mime/encoding content types from HTTP Content Type
+        final String cTMime = getContentTypeMime(httpContentType);
+        final String cTEnc  = getContentTypeEncoding(httpContentType);
+        final boolean appXml  = isAppXml(cTMime);
+        final boolean textXml = isTextXml(cTMime);
+
+        // Mime type NOT "application/xml" or "text/xml"
+        if (!appXml && !textXml) {
+            final String msg = MessageFormat.format(HTTP_EX_3, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+            throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+        }
+
+        // No content type encoding
+        if (cTEnc == null) {
+            if (appXml) {
+                return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
             }
-            throw ex;
+            return defaultEncoding == null ? US_ASCII : defaultEncoding;
         }
-    }
 
-    /**
-     * Process a HTTP stream.
-     *
-     * @param bom BOMInputStream to detect byte order marks
-     * @param pis BOMInputStream to guess XML encoding
-     * @param httpContentType The HTTP content type
-     * @param lenient indicates if the charset encoding detection should be
-     *        relaxed.
-     * @return the encoding to be used
-     * @throws IOException thrown if there is a problem reading the stream.
-     */
-    private String processHttpStream(final BOMInputStream bom, final BOMInputStream pis, final String httpContentType,
-        final boolean lenient) throws IOException {
-        final String bomEnc = bom.getBOMCharsetName();
-        final String xmlGuessEnc = pis.getBOMCharsetName();
-        final String xmlEnc = getXmlProlog(pis, xmlGuessEnc);
-        try {
-            return calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient);
-        } catch (final XmlStreamReaderException ex) {
-            if (lenient) {
-                return doLenientDetection(httpContentType, ex);
+        // UTF-16BE or UTF-16LE content type encoding
+        if (cTEnc.equals(UTF_16BE) || cTEnc.equals(UTF_16LE)) {
+            if (bomEnc != null) {
+                final String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+                throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
             }
-            throw ex;
+            return cTEnc;
         }
-    }
 
-    /**
-     * Do lenient detection.
-     *
-     * @param httpContentType content-type header to use for the resolution of
-     *        the charset encoding.
-     * @param ex The thrown exception
-     * @return the encoding
-     * @throws IOException thrown if there is a problem reading the stream.
-     */
-    private String doLenientDetection(String httpContentType,
-            XmlStreamReaderException ex) throws IOException {
-        if (httpContentType != null && httpContentType.startsWith("text/html")) {
-            httpContentType = httpContentType.substring("text/html".length());
-            httpContentType = "text/xml" + httpContentType;
-            try {
-                return calculateHttpEncoding(httpContentType, ex.getBomEncoding(),
-                        ex.getXmlGuessEncoding(), ex.getXmlEncoding(), true);
-            } catch (final XmlStreamReaderException ex2) {
-                ex = ex2;
+        // UTF-16 content type encoding
+        if (cTEnc.equals(UTF_16)) {
+            if (bomEnc != null && bomEnc.startsWith(UTF_16)) {
+                return bomEnc;
             }
+            final String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+            throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
         }
-        String encoding = ex.getXmlEncoding();
-        if (encoding == null) {
-            encoding = ex.getContentTypeEncoding();
+
+        // UTF-32BE or UTF-132E content type encoding
+        if (cTEnc.equals(UTF_32BE) || cTEnc.equals(UTF_32LE)) {
+            if (bomEnc != null) {
+                final String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+                throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+            }
+            return cTEnc;
         }
-        if (encoding == null) {
-            encoding = defaultEncoding == null ? UTF_8 : defaultEncoding;
+
+        // UTF-32 content type encoding
+        if (cTEnc.equals(UTF_32)) {
+            if (bomEnc != null && bomEnc.startsWith(UTF_32)) {
+                return bomEnc;
+            }
+            final String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+            throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
         }
-        return encoding;
+
+        return cTEnc;
     }
 
     /**
@@ -570,234 +674,130 @@ public class XmlStreamReader extends Reader {
         throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc);
     }
 
+    /**
+     * Closes the XmlStreamReader stream.
+     *
+     * @throws IOException thrown if there was a problem closing the stream.
+     */
+    @Override
+    public void close() throws IOException {
+        reader.close();
+    }
 
     /**
-     * Calculate the HTTP encoding.
+     * Do lenient detection.
      *
-     * @param httpContentType The HTTP content type
-     * @param bomEnc BOM encoding
-     * @param xmlGuessEnc XML Guess encoding
-     * @param xmlEnc XML encoding
-     * @param lenient indicates if the charset encoding detection should be
-     *        relaxed.
-     * @return the HTTP encoding
+     * @param httpContentType content-type header to use for the resolution of
+     *        the charset encoding.
+     * @param ex The thrown exception
+     * @return the encoding
      * @throws IOException thrown if there is a problem reading the stream.
      */
-    String calculateHttpEncoding(final String httpContentType,
-            final String bomEnc, final String xmlGuessEnc, final String xmlEnc,
-            final boolean lenient) throws IOException {
-
-        // Lenient and has XML encoding
-        if (lenient && xmlEnc != null) {
-            return xmlEnc;
-        }
-
-        // Determine mime/encoding content types from HTTP Content Type
-        final String cTMime = getContentTypeMime(httpContentType);
-        final String cTEnc  = getContentTypeEncoding(httpContentType);
-        final boolean appXml  = isAppXml(cTMime);
-        final boolean textXml = isTextXml(cTMime);
-
-        // Mime type NOT "application/xml" or "text/xml"
-        if (!appXml && !textXml) {
-            final String msg = MessageFormat.format(HTTP_EX_3, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-            throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-        }
-
-        // No content type encoding
-        if (cTEnc == null) {
-            if (appXml) {
-                return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
-            }
-            return defaultEncoding == null ? US_ASCII : defaultEncoding;
-        }
-
-        // UTF-16BE or UTF-16LE content type encoding
-        if (cTEnc.equals(UTF_16BE) || cTEnc.equals(UTF_16LE)) {
-            if (bomEnc != null) {
-                final String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-                throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-            }
-            return cTEnc;
-        }
-
-        // UTF-16 content type encoding
-        if (cTEnc.equals(UTF_16)) {
-            if (bomEnc != null && bomEnc.startsWith(UTF_16)) {
-                return bomEnc;
+    private String doLenientDetection(String httpContentType,
+            XmlStreamReaderException ex) throws IOException {
+        if (httpContentType != null && httpContentType.startsWith("text/html")) {
+            httpContentType = httpContentType.substring("text/html".length());
+            httpContentType = "text/xml" + httpContentType;
+            try {
+                return calculateHttpEncoding(httpContentType, ex.getBomEncoding(),
+                        ex.getXmlGuessEncoding(), ex.getXmlEncoding(), true);
+            } catch (final XmlStreamReaderException ex2) {
+                ex = ex2;
             }
-            final String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-            throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
         }
-
-        // UTF-32BE or UTF-132E content type encoding
-        if (cTEnc.equals(UTF_32BE) || cTEnc.equals(UTF_32LE)) {
-            if (bomEnc != null) {
-                final String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-                throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-            }
-            return cTEnc;
+        String encoding = ex.getXmlEncoding();
+        if (encoding == null) {
+            encoding = ex.getContentTypeEncoding();
         }
-
-        // UTF-32 content type encoding
-        if (cTEnc.equals(UTF_32)) {
-            if (bomEnc != null && bomEnc.startsWith(UTF_32)) {
-                return bomEnc;
-            }
-            final String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-            throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+        if (encoding == null) {
+            encoding = defaultEncoding == null ? UTF_8 : defaultEncoding;
         }
-
-        return cTEnc;
+        return encoding;
     }
 
     /**
-     * Returns MIME type or NULL if httpContentType is NULL.
+     * Process the raw stream.
      *
-     * @param httpContentType the HTTP content type
-     * @return The mime content type
+     * @param bom BOMInputStream to detect byte order marks
+     * @param pis BOMInputStream to guess XML encoding
+     * @param lenient indicates if the charset encoding detection should be
+     *        relaxed.
+     * @return the encoding to be used
+     * @throws IOException thrown if there is a problem reading the stream.
      */
-    static String getContentTypeMime(final String httpContentType) {
-        String mime = null;
-        if (httpContentType != null) {
-            final int i = httpContentType.indexOf(";");
-            if (i >= 0) {
-                mime = httpContentType.substring(0, i);
-            } else {
-                mime = httpContentType;
+    private String doRawStream(final BOMInputStream bom, final BOMInputStream pis, final boolean lenient)
+            throws IOException {
+        final String bomEnc      = bom.getBOMCharsetName();
+        final String xmlGuessEnc = pis.getBOMCharsetName();
+        final String xmlEnc = getXmlProlog(pis, xmlGuessEnc);
+        try {
+            return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
+        } catch (final XmlStreamReaderException ex) {
+            if (lenient) {
+                return doLenientDetection(null, ex);
             }
-            mime = mime.trim();
+            throw ex;
         }
-        return mime;
     }
 
-    private static final Pattern CHARSET_PATTERN = Pattern
-            .compile("charset=[\"']?([.[^; \"']]*)[\"']?");
-
     /**
-     * Returns charset parameter value, NULL if not present, NULL if
-     * httpContentType is NULL.
+     * Returns the default encoding to use if none is set in HTTP content-type,
+     * XML prolog and the rules based on content-type are not adequate.
+     * <p>
+     * If it is NULL the content-type based rules are used.
      *
-     * @param httpContentType the HTTP content type
-     * @return The content type encoding (upcased)
+     * @return the default encoding to use.
      */
-    static String getContentTypeEncoding(final String httpContentType) {
-        String encoding = null;
-        if (httpContentType != null) {
-            final int i = httpContentType.indexOf(";");
-            if (i > -1) {
-                final String postMime = httpContentType.substring(i + 1);
-                final Matcher m = CHARSET_PATTERN.matcher(postMime);
-                encoding = m.find() ? m.group(1) : null;
-                encoding = encoding != null ? encoding.toUpperCase(Locale.ROOT) : null;
-            }
-        }
-        return encoding;
+    public String getDefaultEncoding() {
+        return defaultEncoding;
     }
 
     /**
-     * Pattern capturing the encoding of the "xml" processing instruction.
-     */
-    public static final Pattern ENCODING_PATTERN = Pattern.compile(
-            "<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))",
-            Pattern.MULTILINE);
-
-    /**
-     * Returns the encoding declared in the <?xml encoding=...?>, NULL if none.
+     * Returns the charset encoding of the XmlStreamReader.
      *
-     * @param inputStream InputStream to create the reader from.
-     * @param guessedEnc guessed encoding
-     * @return the encoding declared in the <?xml encoding=...?>
-     * @throws IOException thrown if there is a problem reading the stream.
+     * @return charset encoding.
      */
-    private static String getXmlProlog(final InputStream inputStream, final String guessedEnc)
-            throws IOException {
-        String encoding = null;
-        if (guessedEnc != null) {
-            final byte[] bytes = new byte[BUFFER_SIZE];
-            inputStream.mark(BUFFER_SIZE);
-            int offset = 0;
-            int max = BUFFER_SIZE;
-            int c = inputStream.read(bytes, offset, max);
-            int firstGT = -1;
-            String xmlProlog = ""; // avoid possible NPE warning (cannot happen; this just silences the warning)
-            while (c != -1 && firstGT == -1 && offset < BUFFER_SIZE) {
-                offset += c;
-                max -= c;
-                c = inputStream.read(bytes, offset, max);
-                xmlProlog = new String(bytes, 0, offset, guessedEnc);
-                firstGT = xmlProlog.indexOf('>');
-            }
-            if (firstGT == -1) {
-                if (c == -1) {
-                    throw new IOException("Unexpected end of XML stream");
-                }
-                throw new IOException(
-                        "XML prolog or ROOT element not found on first "
-                                + offset + " bytes");
-            }
-            final int bytesRead = offset;
-            if (bytesRead > 0) {
-                inputStream.reset();
-                final BufferedReader bReader = new BufferedReader(new StringReader(
-                        xmlProlog.substring(0, firstGT + 1)));
-                final StringBuffer prolog = new StringBuffer();
-                String line;
-                while ((line = bReader.readLine()) != null) {
-                    prolog.append(line);
-                }
-                final Matcher m = ENCODING_PATTERN.matcher(prolog);
-                if (m.find()) {
-                    encoding = m.group(1).toUpperCase(Locale.ROOT);
-                    encoding = encoding.substring(1, encoding.length() - 1);
-                }
-            }
-        }
+    public String getEncoding() {
         return encoding;
     }
 
     /**
-     * Indicates if the MIME type belongs to the APPLICATION XML family.
+     * Process a HTTP stream.
      *
-     * @param mime The mime type
-     * @return true if the mime type belongs to the APPLICATION XML family,
-     * otherwise false
+     * @param bom BOMInputStream to detect byte order marks
+     * @param pis BOMInputStream to guess XML encoding
+     * @param httpContentType The HTTP content type
+     * @param lenient indicates if the charset encoding detection should be
+     *        relaxed.
+     * @return the encoding to be used
+     * @throws IOException thrown if there is a problem reading the stream.
      */
-    static boolean isAppXml(final String mime) {
-        return mime != null &&
-               (mime.equals("application/xml") ||
-                mime.equals("application/xml-dtd") ||
-                mime.equals("application/xml-external-parsed-entity") ||
-               mime.startsWith("application/") && mime.endsWith("+xml"));
+    private String processHttpStream(final BOMInputStream bom, final BOMInputStream pis, final String httpContentType,
+        final boolean lenient) throws IOException {
+        final String bomEnc = bom.getBOMCharsetName();
+        final String xmlGuessEnc = pis.getBOMCharsetName();
+        final String xmlEnc = getXmlProlog(pis, xmlGuessEnc);
+        try {
+            return calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient);
+        } catch (final XmlStreamReaderException ex) {
+            if (lenient) {
+                return doLenientDetection(httpContentType, ex);
+            }
+            throw ex;
+        }
     }
 
     /**
-     * Indicates if the MIME type belongs to the TEXT XML family.
-     *
-     * @param mime The mime type
-     * @return true if the mime type belongs to the TEXT XML family,
-     * otherwise false
+     * Invokes the underlying reader's {@code read(char[], int, int)} method.
+     * @param buf the buffer to read the characters into
+     * @param offset The start offset
+     * @param len The number of bytes to read
+     * @return the number of characters read or -1 if the end of stream
+     * @throws IOException if an I/O error occurs
      */
-    static boolean isTextXml(final String mime) {
-        return mime != null &&
-              (mime.equals("text/xml") ||
-               mime.equals("text/xml-external-parsed-entity") ||
-              mime.startsWith("text/") && mime.endsWith("+xml"));
+    @Override
+    public int read(final char[] buf, final int offset, final int len) throws IOException {
+        return reader.read(buf, offset, len);
     }
 
-    private static final String RAW_EX_1 =
-        "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch";
-
-    private static final String RAW_EX_2 =
-        "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM";
-
-    private static final String HTTP_EX_1 =
-        "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL";
-
-    private static final String HTTP_EX_2 =
-        "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch";
-
-    private static final String HTTP_EX_3 =
-        "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME";
-
 }