You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2021/01/21 16:36:43 UTC

[commons-io] branch master updated (883b5c8 -> 6d0059e)

This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/commons-io.git.


    from 883b5c8  Add @SuppressWarnings and rename private var.
     new e403b84  Sort members.
     new 6d0059e  Add @SuppressWarnings with comments.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../apache/commons/io/input/XmlStreamReader.java   | 712 +++++++++++----------
 1 file changed, 358 insertions(+), 354 deletions(-)


[commons-io] 01/02: Sort members.

Posted by gg...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-io.git

commit e403b84597a4299d1cd2d1b93224eade2530128c
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Thu Jan 21 11:22:50 2021 -0500

    Sort members.
---
 .../apache/commons/io/input/XmlStreamReader.java   | 708 ++++++++++-----------
 1 file changed, 354 insertions(+), 354 deletions(-)

diff --git a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
index d77d382..ee80736 100644
--- a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
+++ b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
@@ -110,24 +110,161 @@ public class XmlStreamReader extends Reader {
         new ByteOrderMark(EBCDIC,   0x4C, 0x6F, 0xA7, 0x94)
     };
 
-    private final Reader reader;
+    private static final Pattern CHARSET_PATTERN = Pattern
+            .compile("charset=[\"']?([.[^; \"']]*)[\"']?");
 
-    private final String encoding;
+    /**
+     * Pattern capturing the encoding of the "xml" processing instruction.
+     */
+    public static final Pattern ENCODING_PATTERN = Pattern.compile(
+            "<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))",
+            Pattern.MULTILINE);
 
-    private final String defaultEncoding;
+    private static final String RAW_EX_1 =
+        "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch";
+
+    private static final String RAW_EX_2 =
+        "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM";
+
+    private static final String HTTP_EX_1 =
+        "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL";
+
+    private static final String HTTP_EX_2 =
+        "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch";
+
+    private static final String HTTP_EX_3 =
+        "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME";
 
     /**
-     * Returns the default encoding to use if none is set in HTTP content-type,
-     * XML prolog and the rules based on content-type are not adequate.
-     * <p>
-     * If it is NULL the content-type based rules are used.
+     * Returns charset parameter value, NULL if not present, NULL if
+     * httpContentType is NULL.
      *
-     * @return the default encoding to use.
+     * @param httpContentType the HTTP content type
+     * @return The content type encoding (upcased)
      */
-    public String getDefaultEncoding() {
-        return defaultEncoding;
+    static String getContentTypeEncoding(final String httpContentType) {
+        String encoding = null;
+        if (httpContentType != null) {
+            final int i = httpContentType.indexOf(";");
+            if (i > -1) {
+                final String postMime = httpContentType.substring(i + 1);
+                final Matcher m = CHARSET_PATTERN.matcher(postMime);
+                encoding = m.find() ? m.group(1) : null;
+                encoding = encoding != null ? encoding.toUpperCase(Locale.ROOT) : null;
+            }
+        }
+        return encoding;
+    }
+
+    /**
+     * Returns MIME type or NULL if httpContentType is NULL.
+     *
+     * @param httpContentType the HTTP content type
+     * @return The mime content type
+     */
+    static String getContentTypeMime(final String httpContentType) {
+        String mime = null;
+        if (httpContentType != null) {
+            final int i = httpContentType.indexOf(";");
+            if (i >= 0) {
+                mime = httpContentType.substring(0, i);
+            } else {
+                mime = httpContentType;
+            }
+            mime = mime.trim();
+        }
+        return mime;
+    }
+
+    /**
+     * Returns the encoding declared in the <?xml encoding=...?>, NULL if none.
+     *
+     * @param inputStream InputStream to create the reader from.
+     * @param guessedEnc guessed encoding
+     * @return the encoding declared in the <?xml encoding=...?>
+     * @throws IOException thrown if there is a problem reading the stream.
+     */
+    private static String getXmlProlog(final InputStream inputStream, final String guessedEnc)
+            throws IOException {
+        String encoding = null;
+        if (guessedEnc != null) {
+            final byte[] bytes = new byte[BUFFER_SIZE];
+            inputStream.mark(BUFFER_SIZE);
+            int offset = 0;
+            int max = BUFFER_SIZE;
+            int c = inputStream.read(bytes, offset, max);
+            int firstGT = -1;
+            String xmlProlog = ""; // avoid possible NPE warning (cannot happen; this just silences the warning)
+            while (c != -1 && firstGT == -1 && offset < BUFFER_SIZE) {
+                offset += c;
+                max -= c;
+                c = inputStream.read(bytes, offset, max);
+                xmlProlog = new String(bytes, 0, offset, guessedEnc);
+                firstGT = xmlProlog.indexOf('>');
+            }
+            if (firstGT == -1) {
+                if (c == -1) {
+                    throw new IOException("Unexpected end of XML stream");
+                }
+                throw new IOException(
+                        "XML prolog or ROOT element not found on first "
+                                + offset + " bytes");
+            }
+            final int bytesRead = offset;
+            if (bytesRead > 0) {
+                inputStream.reset();
+                final BufferedReader bReader = new BufferedReader(new StringReader(
+                        xmlProlog.substring(0, firstGT + 1)));
+                final StringBuffer prolog = new StringBuffer();
+                String line;
+                while ((line = bReader.readLine()) != null) {
+                    prolog.append(line);
+                }
+                final Matcher m = ENCODING_PATTERN.matcher(prolog);
+                if (m.find()) {
+                    encoding = m.group(1).toUpperCase(Locale.ROOT);
+                    encoding = encoding.substring(1, encoding.length() - 1);
+                }
+            }
+        }
+        return encoding;
+    }
+
+    /**
+     * Indicates if the MIME type belongs to the APPLICATION XML family.
+     *
+     * @param mime The mime type
+     * @return true if the mime type belongs to the APPLICATION XML family,
+     * otherwise false
+     */
+    static boolean isAppXml(final String mime) {
+        return mime != null &&
+               (mime.equals("application/xml") ||
+                mime.equals("application/xml-dtd") ||
+                mime.equals("application/xml-external-parsed-entity") ||
+               mime.startsWith("application/") && mime.endsWith("+xml"));
+    }
+
+    /**
+     * Indicates if the MIME type belongs to the TEXT XML family.
+     *
+     * @param mime The mime type
+     * @return true if the mime type belongs to the TEXT XML family,
+     * otherwise false
+     */
+    static boolean isTextXml(final String mime) {
+        return mime != null &&
+              (mime.equals("text/xml") ||
+               mime.equals("text/xml-external-parsed-entity") ||
+              mime.startsWith("text/") && mime.endsWith("+xml"));
     }
 
+    private final Reader reader;
+
+    private final String encoding;
+
+    private final String defaultEncoding;
+
     /**
      * Creates a Reader for a File.
      * <p>
@@ -229,62 +366,6 @@ public class XmlStreamReader extends Reader {
     }
 
     /**
-     * Creates a Reader using the InputStream of a URL.
-     * <p>
-     * If the URL is not of type HTTP and there is not 'content-type' header in
-     * the fetched data it uses the same logic used for Files.
-     * <p>
-     * If the URL is a HTTP Url or there is a 'content-type' header in the
-     * fetched data it uses the same logic used for an InputStream with
-     * content-type.
-     * <p>
-     * It does a lenient charset encoding detection, check the constructor with
-     * the lenient parameter for details.
-     *
-     * @param url URL to create a Reader from.
-     * @throws IOException thrown if there is a problem reading the stream of
-     *         the URL.
-     */
-    public XmlStreamReader(final URL url) throws IOException {
-        this(Objects.requireNonNull(url, "url").openConnection(), null);
-    }
-
-    /**
-     * Creates a Reader using the InputStream of a URLConnection.
-     * <p>
-     * If the URLConnection is not of type HttpURLConnection and there is not
-     * 'content-type' header in the fetched data it uses the same logic used for
-     * files.
-     * <p>
-     * If the URLConnection is a HTTP Url or there is a 'content-type' header in
-     * the fetched data it uses the same logic used for an InputStream with
-     * content-type.
-     * <p>
-     * It does a lenient charset encoding detection, check the constructor with
-     * the lenient parameter for details.
-     *
-     * @param conn URLConnection to create a Reader from.
-     * @param defaultEncoding The default encoding
-     * @throws IOException thrown if there is a problem reading the stream of
-     *         the URLConnection.
-     */
-    public XmlStreamReader(final URLConnection conn, final String defaultEncoding) throws IOException {
-        Objects.requireNonNull(conn, "conm");
-        this.defaultEncoding = defaultEncoding;
-        final boolean lenient = true;
-        final String contentType = conn.getContentType();
-        final InputStream inputStream = conn.getInputStream();
-        final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(inputStream, BUFFER_SIZE), false, BOMS);
-        final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
-        if (conn instanceof HttpURLConnection || contentType != null) {
-            this.encoding = processHttpStream(bom, pis, contentType, lenient);
-        } else {
-            this.encoding = doRawStream(bom, pis, lenient);
-        }
-        this.reader = new InputStreamReader(pis, encoding);
-    }
-
-    /**
      * Creates a Reader using an InputStream and the associated content-type
      * header.
      * <p>
@@ -306,6 +387,7 @@ public class XmlStreamReader extends Reader {
         this(inputStream, httpContentType, true);
     }
 
+
     /**
      * Creates a Reader using an InputStream and the associated content-type
      * header. This constructor is lenient regarding the encoding detection.
@@ -335,19 +417,13 @@ public class XmlStreamReader extends Reader {
      *        the charset encoding.
      * @param lenient indicates if the charset encoding detection should be
      *        relaxed.
-     * @param defaultEncoding The default encoding
      * @throws IOException thrown if there is a problem reading the file.
      * @throws XmlStreamReaderException thrown if the charset encoding could not
      *         be determined according to the specs.
      */
     public XmlStreamReader(final InputStream inputStream, final String httpContentType,
-            final boolean lenient, final String defaultEncoding) throws IOException {
-        Objects.requireNonNull(inputStream, "inputStream");
-        this.defaultEncoding = defaultEncoding;
-        final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(inputStream, BUFFER_SIZE), false, BOMS);
-        final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
-        this.encoding = processHttpStream(bom, pis, httpContentType, lenient);
-        this.reader = new InputStreamReader(pis, encoding);
+            final boolean lenient) throws IOException {
+        this(inputStream, httpContentType, lenient, null);
     }
 
     /**
@@ -379,127 +455,155 @@ public class XmlStreamReader extends Reader {
      *        the charset encoding.
      * @param lenient indicates if the charset encoding detection should be
      *        relaxed.
+     * @param defaultEncoding The default encoding
      * @throws IOException thrown if there is a problem reading the file.
      * @throws XmlStreamReaderException thrown if the charset encoding could not
      *         be determined according to the specs.
      */
     public XmlStreamReader(final InputStream inputStream, final String httpContentType,
-            final boolean lenient) throws IOException {
-        this(inputStream, httpContentType, lenient, null);
+            final boolean lenient, final String defaultEncoding) throws IOException {
+        Objects.requireNonNull(inputStream, "inputStream");
+        this.defaultEncoding = defaultEncoding;
+        final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(inputStream, BUFFER_SIZE), false, BOMS);
+        final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
+        this.encoding = processHttpStream(bom, pis, httpContentType, lenient);
+        this.reader = new InputStreamReader(pis, encoding);
     }
 
     /**
-     * Returns the charset encoding of the XmlStreamReader.
+     * Creates a Reader using the InputStream of a URL.
+     * <p>
+     * If the URL is not of type HTTP and there is not 'content-type' header in
+     * the fetched data it uses the same logic used for Files.
+     * <p>
+     * If the URL is a HTTP Url or there is a 'content-type' header in the
+     * fetched data it uses the same logic used for an InputStream with
+     * content-type.
+     * <p>
+     * It does a lenient charset encoding detection, check the constructor with
+     * the lenient parameter for details.
      *
-     * @return charset encoding.
+     * @param url URL to create a Reader from.
+     * @throws IOException thrown if there is a problem reading the stream of
+     *         the URL.
      */
-    public String getEncoding() {
-        return encoding;
+    public XmlStreamReader(final URL url) throws IOException {
+        this(Objects.requireNonNull(url, "url").openConnection(), null);
     }
 
     /**
-     * Invokes the underlying reader's {@code read(char[], int, int)} method.
-     * @param buf the buffer to read the characters into
-     * @param offset The start offset
-     * @param len The number of bytes to read
-     * @return the number of characters read or -1 if the end of stream
-     * @throws IOException if an I/O error occurs
-     */
-    @Override
-    public int read(final char[] buf, final int offset, final int len) throws IOException {
-        return reader.read(buf, offset, len);
-    }
-
-    /**
-     * Closes the XmlStreamReader stream.
+     * Creates a Reader using the InputStream of a URLConnection.
+     * <p>
+     * If the URLConnection is not of type HttpURLConnection and there is not
+     * 'content-type' header in the fetched data it uses the same logic used for
+     * files.
+     * <p>
+     * If the URLConnection is a HTTP Url or there is a 'content-type' header in
+     * the fetched data it uses the same logic used for an InputStream with
+     * content-type.
+     * <p>
+     * It does a lenient charset encoding detection, check the constructor with
+     * the lenient parameter for details.
      *
-     * @throws IOException thrown if there was a problem closing the stream.
+     * @param conn URLConnection to create a Reader from.
+     * @param defaultEncoding The default encoding
+     * @throws IOException thrown if there is a problem reading the stream of
+     *         the URLConnection.
      */
-    @Override
-    public void close() throws IOException {
-        reader.close();
+    public XmlStreamReader(final URLConnection conn, final String defaultEncoding) throws IOException {
+        Objects.requireNonNull(conn, "conm");
+        this.defaultEncoding = defaultEncoding;
+        final boolean lenient = true;
+        final String contentType = conn.getContentType();
+        final InputStream inputStream = conn.getInputStream();
+        final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(inputStream, BUFFER_SIZE), false, BOMS);
+        final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
+        if (conn instanceof HttpURLConnection || contentType != null) {
+            this.encoding = processHttpStream(bom, pis, contentType, lenient);
+        } else {
+            this.encoding = doRawStream(bom, pis, lenient);
+        }
+        this.reader = new InputStreamReader(pis, encoding);
     }
 
     /**
-     * Process the raw stream.
+     * Calculate the HTTP encoding.
      *
-     * @param bom BOMInputStream to detect byte order marks
-     * @param pis BOMInputStream to guess XML encoding
+     * @param httpContentType The HTTP content type
+     * @param bomEnc BOM encoding
+     * @param xmlGuessEnc XML Guess encoding
+     * @param xmlEnc XML encoding
      * @param lenient indicates if the charset encoding detection should be
      *        relaxed.
-     * @return the encoding to be used
+     * @return the HTTP encoding
      * @throws IOException thrown if there is a problem reading the stream.
      */
-    private String doRawStream(final BOMInputStream bom, final BOMInputStream pis, final boolean lenient)
-            throws IOException {
-        final String bomEnc      = bom.getBOMCharsetName();
-        final String xmlGuessEnc = pis.getBOMCharsetName();
-        final String xmlEnc = getXmlProlog(pis, xmlGuessEnc);
-        try {
-            return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
-        } catch (final XmlStreamReaderException ex) {
-            if (lenient) {
-                return doLenientDetection(null, ex);
+    String calculateHttpEncoding(final String httpContentType,
+            final String bomEnc, final String xmlGuessEnc, final String xmlEnc,
+            final boolean lenient) throws IOException {
+
+        // Lenient and has XML encoding
+        if (lenient && xmlEnc != null) {
+            return xmlEnc;
+        }
+
+        // Determine mime/encoding content types from HTTP Content Type
+        final String cTMime = getContentTypeMime(httpContentType);
+        final String cTEnc  = getContentTypeEncoding(httpContentType);
+        final boolean appXml  = isAppXml(cTMime);
+        final boolean textXml = isTextXml(cTMime);
+
+        // Mime type NOT "application/xml" or "text/xml"
+        if (!appXml && !textXml) {
+            final String msg = MessageFormat.format(HTTP_EX_3, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+            throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+        }
+
+        // No content type encoding
+        if (cTEnc == null) {
+            if (appXml) {
+                return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
             }
-            throw ex;
+            return defaultEncoding == null ? US_ASCII : defaultEncoding;
         }
-    }
 
-    /**
-     * Process a HTTP stream.
-     *
-     * @param bom BOMInputStream to detect byte order marks
-     * @param pis BOMInputStream to guess XML encoding
-     * @param httpContentType The HTTP content type
-     * @param lenient indicates if the charset encoding detection should be
-     *        relaxed.
-     * @return the encoding to be used
-     * @throws IOException thrown if there is a problem reading the stream.
-     */
-    private String processHttpStream(final BOMInputStream bom, final BOMInputStream pis, final String httpContentType,
-        final boolean lenient) throws IOException {
-        final String bomEnc = bom.getBOMCharsetName();
-        final String xmlGuessEnc = pis.getBOMCharsetName();
-        final String xmlEnc = getXmlProlog(pis, xmlGuessEnc);
-        try {
-            return calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient);
-        } catch (final XmlStreamReaderException ex) {
-            if (lenient) {
-                return doLenientDetection(httpContentType, ex);
+        // UTF-16BE or UTF-16LE content type encoding
+        if (cTEnc.equals(UTF_16BE) || cTEnc.equals(UTF_16LE)) {
+            if (bomEnc != null) {
+                final String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+                throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
             }
-            throw ex;
+            return cTEnc;
         }
-    }
 
-    /**
-     * Do lenient detection.
-     *
-     * @param httpContentType content-type header to use for the resolution of
-     *        the charset encoding.
-     * @param ex The thrown exception
-     * @return the encoding
-     * @throws IOException thrown if there is a problem reading the stream.
-     */
-    private String doLenientDetection(String httpContentType,
-            XmlStreamReaderException ex) throws IOException {
-        if (httpContentType != null && httpContentType.startsWith("text/html")) {
-            httpContentType = httpContentType.substring("text/html".length());
-            httpContentType = "text/xml" + httpContentType;
-            try {
-                return calculateHttpEncoding(httpContentType, ex.getBomEncoding(),
-                        ex.getXmlGuessEncoding(), ex.getXmlEncoding(), true);
-            } catch (final XmlStreamReaderException ex2) {
-                ex = ex2;
+        // UTF-16 content type encoding
+        if (cTEnc.equals(UTF_16)) {
+            if (bomEnc != null && bomEnc.startsWith(UTF_16)) {
+                return bomEnc;
             }
+            final String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+            throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
         }
-        String encoding = ex.getXmlEncoding();
-        if (encoding == null) {
-            encoding = ex.getContentTypeEncoding();
+
+        // UTF-32BE or UTF-132E content type encoding
+        if (cTEnc.equals(UTF_32BE) || cTEnc.equals(UTF_32LE)) {
+            if (bomEnc != null) {
+                final String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+                throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+            }
+            return cTEnc;
         }
-        if (encoding == null) {
-            encoding = defaultEncoding == null ? UTF_8 : defaultEncoding;
+
+        // UTF-32 content type encoding
+        if (cTEnc.equals(UTF_32)) {
+            if (bomEnc != null && bomEnc.startsWith(UTF_32)) {
+                return bomEnc;
+            }
+            final String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+            throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
         }
-        return encoding;
+
+        return cTEnc;
     }
 
     /**
@@ -570,234 +674,130 @@ public class XmlStreamReader extends Reader {
         throw new XmlStreamReaderException(msg, bomEnc, xmlGuessEnc, xmlEnc);
     }
 
+    /**
+     * Closes the XmlStreamReader stream.
+     *
+     * @throws IOException thrown if there was a problem closing the stream.
+     */
+    @Override
+    public void close() throws IOException {
+        reader.close();
+    }
 
     /**
-     * Calculate the HTTP encoding.
+     * Do lenient detection.
      *
-     * @param httpContentType The HTTP content type
-     * @param bomEnc BOM encoding
-     * @param xmlGuessEnc XML Guess encoding
-     * @param xmlEnc XML encoding
-     * @param lenient indicates if the charset encoding detection should be
-     *        relaxed.
-     * @return the HTTP encoding
+     * @param httpContentType content-type header to use for the resolution of
+     *        the charset encoding.
+     * @param ex The thrown exception
+     * @return the encoding
      * @throws IOException thrown if there is a problem reading the stream.
      */
-    String calculateHttpEncoding(final String httpContentType,
-            final String bomEnc, final String xmlGuessEnc, final String xmlEnc,
-            final boolean lenient) throws IOException {
-
-        // Lenient and has XML encoding
-        if (lenient && xmlEnc != null) {
-            return xmlEnc;
-        }
-
-        // Determine mime/encoding content types from HTTP Content Type
-        final String cTMime = getContentTypeMime(httpContentType);
-        final String cTEnc  = getContentTypeEncoding(httpContentType);
-        final boolean appXml  = isAppXml(cTMime);
-        final boolean textXml = isTextXml(cTMime);
-
-        // Mime type NOT "application/xml" or "text/xml"
-        if (!appXml && !textXml) {
-            final String msg = MessageFormat.format(HTTP_EX_3, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-            throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-        }
-
-        // No content type encoding
-        if (cTEnc == null) {
-            if (appXml) {
-                return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
-            }
-            return defaultEncoding == null ? US_ASCII : defaultEncoding;
-        }
-
-        // UTF-16BE or UTF-16LE content type encoding
-        if (cTEnc.equals(UTF_16BE) || cTEnc.equals(UTF_16LE)) {
-            if (bomEnc != null) {
-                final String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-                throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-            }
-            return cTEnc;
-        }
-
-        // UTF-16 content type encoding
-        if (cTEnc.equals(UTF_16)) {
-            if (bomEnc != null && bomEnc.startsWith(UTF_16)) {
-                return bomEnc;
+    private String doLenientDetection(String httpContentType,
+            XmlStreamReaderException ex) throws IOException {
+        if (httpContentType != null && httpContentType.startsWith("text/html")) {
+            httpContentType = httpContentType.substring("text/html".length());
+            httpContentType = "text/xml" + httpContentType;
+            try {
+                return calculateHttpEncoding(httpContentType, ex.getBomEncoding(),
+                        ex.getXmlGuessEncoding(), ex.getXmlEncoding(), true);
+            } catch (final XmlStreamReaderException ex2) {
+                ex = ex2;
             }
-            final String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-            throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
         }
-
-        // UTF-32BE or UTF-132E content type encoding
-        if (cTEnc.equals(UTF_32BE) || cTEnc.equals(UTF_32LE)) {
-            if (bomEnc != null) {
-                final String msg = MessageFormat.format(HTTP_EX_1, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-                throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-            }
-            return cTEnc;
+        String encoding = ex.getXmlEncoding();
+        if (encoding == null) {
+            encoding = ex.getContentTypeEncoding();
         }
-
-        // UTF-32 content type encoding
-        if (cTEnc.equals(UTF_32)) {
-            if (bomEnc != null && bomEnc.startsWith(UTF_32)) {
-                return bomEnc;
-            }
-            final String msg = MessageFormat.format(HTTP_EX_2, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
-            throw new XmlStreamReaderException(msg, cTMime, cTEnc, bomEnc, xmlGuessEnc, xmlEnc);
+        if (encoding == null) {
+            encoding = defaultEncoding == null ? UTF_8 : defaultEncoding;
         }
-
-        return cTEnc;
+        return encoding;
     }
 
     /**
-     * Returns MIME type or NULL if httpContentType is NULL.
+     * Process the raw stream.
      *
-     * @param httpContentType the HTTP content type
-     * @return The mime content type
+     * @param bom BOMInputStream to detect byte order marks
+     * @param pis BOMInputStream to guess XML encoding
+     * @param lenient indicates if the charset encoding detection should be
+     *        relaxed.
+     * @return the encoding to be used
+     * @throws IOException thrown if there is a problem reading the stream.
      */
-    static String getContentTypeMime(final String httpContentType) {
-        String mime = null;
-        if (httpContentType != null) {
-            final int i = httpContentType.indexOf(";");
-            if (i >= 0) {
-                mime = httpContentType.substring(0, i);
-            } else {
-                mime = httpContentType;
+    private String doRawStream(final BOMInputStream bom, final BOMInputStream pis, final boolean lenient)
+            throws IOException {
+        final String bomEnc      = bom.getBOMCharsetName();
+        final String xmlGuessEnc = pis.getBOMCharsetName();
+        final String xmlEnc = getXmlProlog(pis, xmlGuessEnc);
+        try {
+            return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
+        } catch (final XmlStreamReaderException ex) {
+            if (lenient) {
+                return doLenientDetection(null, ex);
             }
-            mime = mime.trim();
+            throw ex;
         }
-        return mime;
     }
 
-    private static final Pattern CHARSET_PATTERN = Pattern
-            .compile("charset=[\"']?([.[^; \"']]*)[\"']?");
-
     /**
-     * Returns charset parameter value, NULL if not present, NULL if
-     * httpContentType is NULL.
+     * Returns the default encoding to use if none is set in HTTP content-type,
+     * XML prolog and the rules based on content-type are not adequate.
+     * <p>
+     * If it is NULL the content-type based rules are used.
      *
-     * @param httpContentType the HTTP content type
-     * @return The content type encoding (upcased)
+     * @return the default encoding to use.
      */
-    static String getContentTypeEncoding(final String httpContentType) {
-        String encoding = null;
-        if (httpContentType != null) {
-            final int i = httpContentType.indexOf(";");
-            if (i > -1) {
-                final String postMime = httpContentType.substring(i + 1);
-                final Matcher m = CHARSET_PATTERN.matcher(postMime);
-                encoding = m.find() ? m.group(1) : null;
-                encoding = encoding != null ? encoding.toUpperCase(Locale.ROOT) : null;
-            }
-        }
-        return encoding;
+    public String getDefaultEncoding() {
+        return defaultEncoding;
     }
 
     /**
-     * Pattern capturing the encoding of the "xml" processing instruction.
-     */
-    public static final Pattern ENCODING_PATTERN = Pattern.compile(
-            "<\\?xml.*encoding[\\s]*=[\\s]*((?:\".[^\"]*\")|(?:'.[^']*'))",
-            Pattern.MULTILINE);
-
-    /**
-     * Returns the encoding declared in the <?xml encoding=...?>, NULL if none.
+     * Returns the charset encoding of the XmlStreamReader.
      *
-     * @param inputStream InputStream to create the reader from.
-     * @param guessedEnc guessed encoding
-     * @return the encoding declared in the <?xml encoding=...?>
-     * @throws IOException thrown if there is a problem reading the stream.
+     * @return charset encoding.
      */
-    private static String getXmlProlog(final InputStream inputStream, final String guessedEnc)
-            throws IOException {
-        String encoding = null;
-        if (guessedEnc != null) {
-            final byte[] bytes = new byte[BUFFER_SIZE];
-            inputStream.mark(BUFFER_SIZE);
-            int offset = 0;
-            int max = BUFFER_SIZE;
-            int c = inputStream.read(bytes, offset, max);
-            int firstGT = -1;
-            String xmlProlog = ""; // avoid possible NPE warning (cannot happen; this just silences the warning)
-            while (c != -1 && firstGT == -1 && offset < BUFFER_SIZE) {
-                offset += c;
-                max -= c;
-                c = inputStream.read(bytes, offset, max);
-                xmlProlog = new String(bytes, 0, offset, guessedEnc);
-                firstGT = xmlProlog.indexOf('>');
-            }
-            if (firstGT == -1) {
-                if (c == -1) {
-                    throw new IOException("Unexpected end of XML stream");
-                }
-                throw new IOException(
-                        "XML prolog or ROOT element not found on first "
-                                + offset + " bytes");
-            }
-            final int bytesRead = offset;
-            if (bytesRead > 0) {
-                inputStream.reset();
-                final BufferedReader bReader = new BufferedReader(new StringReader(
-                        xmlProlog.substring(0, firstGT + 1)));
-                final StringBuffer prolog = new StringBuffer();
-                String line;
-                while ((line = bReader.readLine()) != null) {
-                    prolog.append(line);
-                }
-                final Matcher m = ENCODING_PATTERN.matcher(prolog);
-                if (m.find()) {
-                    encoding = m.group(1).toUpperCase(Locale.ROOT);
-                    encoding = encoding.substring(1, encoding.length() - 1);
-                }
-            }
-        }
+    public String getEncoding() {
         return encoding;
     }
 
     /**
-     * Indicates if the MIME type belongs to the APPLICATION XML family.
+     * Process a HTTP stream.
      *
-     * @param mime The mime type
-     * @return true if the mime type belongs to the APPLICATION XML family,
-     * otherwise false
+     * @param bom BOMInputStream to detect byte order marks
+     * @param pis BOMInputStream to guess XML encoding
+     * @param httpContentType The HTTP content type
+     * @param lenient indicates if the charset encoding detection should be
+     *        relaxed.
+     * @return the encoding to be used
+     * @throws IOException thrown if there is a problem reading the stream.
      */
-    static boolean isAppXml(final String mime) {
-        return mime != null &&
-               (mime.equals("application/xml") ||
-                mime.equals("application/xml-dtd") ||
-                mime.equals("application/xml-external-parsed-entity") ||
-               mime.startsWith("application/") && mime.endsWith("+xml"));
+    private String processHttpStream(final BOMInputStream bom, final BOMInputStream pis, final String httpContentType,
+        final boolean lenient) throws IOException {
+        final String bomEnc = bom.getBOMCharsetName();
+        final String xmlGuessEnc = pis.getBOMCharsetName();
+        final String xmlEnc = getXmlProlog(pis, xmlGuessEnc);
+        try {
+            return calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient);
+        } catch (final XmlStreamReaderException ex) {
+            if (lenient) {
+                return doLenientDetection(httpContentType, ex);
+            }
+            throw ex;
+        }
     }
 
     /**
-     * Indicates if the MIME type belongs to the TEXT XML family.
-     *
-     * @param mime The mime type
-     * @return true if the mime type belongs to the TEXT XML family,
-     * otherwise false
+     * Invokes the underlying reader's {@code read(char[], int, int)} method.
+     * @param buf the buffer to read the characters into
+     * @param offset The start offset
+     * @param len The number of bytes to read
+     * @return the number of characters read or -1 if the end of stream
+     * @throws IOException if an I/O error occurs
      */
-    static boolean isTextXml(final String mime) {
-        return mime != null &&
-              (mime.equals("text/xml") ||
-               mime.equals("text/xml-external-parsed-entity") ||
-              mime.startsWith("text/") && mime.endsWith("+xml"));
+    @Override
+    public int read(final char[] buf, final int offset, final int len) throws IOException {
+        return reader.read(buf, offset, len);
     }
 
-    private static final String RAW_EX_1 =
-        "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch";
-
-    private static final String RAW_EX_2 =
-        "Invalid encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] unknown BOM";
-
-    private static final String HTTP_EX_1 =
-        "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], BOM must be NULL";
-
-    private static final String HTTP_EX_2 =
-        "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], encoding mismatch";
-
-    private static final String HTTP_EX_3 =
-        "Invalid encoding, CT-MIME [{0}] CT-Enc [{1}] BOM [{2}] XML guess [{3}] XML prolog [{4}], Invalid MIME";
-
 }


[commons-io] 02/02: Add @SuppressWarnings with comments.

Posted by gg...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-io.git

commit 6d0059e4a4c7d0cfb8b6d2660b4496e4b22f5975
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Thu Jan 21 11:36:35 2021 -0500

    Add @SuppressWarnings with comments.
---
 src/main/java/org/apache/commons/io/input/XmlStreamReader.java | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
index ee80736..c015fde 100644
--- a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
+++ b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
@@ -277,6 +277,7 @@ public class XmlStreamReader extends Reader {
      * @param file File to create a Reader from.
      * @throws IOException thrown if there is a problem reading the file.
      */
+    @SuppressWarnings("resource") // FileInputStream is managed through another reader in this instance.
     public XmlStreamReader(final File file) throws IOException {
         this(new FileInputStream(Objects.requireNonNull(file, "file")));
     }
@@ -355,6 +356,7 @@ public class XmlStreamReader extends Reader {
      * @throws XmlStreamReaderException thrown if the charset encoding could not
      *         be determined according to the specs.
      */
+    @SuppressWarnings("resource") // InputStream is managed through a InputStreamReader in this instance.
     public XmlStreamReader(final InputStream inputStream, final boolean lenient, final String defaultEncoding)
             throws IOException {
         Objects.requireNonNull(inputStream, "inputStream");
@@ -460,6 +462,7 @@ public class XmlStreamReader extends Reader {
      * @throws XmlStreamReaderException thrown if the charset encoding could not
      *         be determined according to the specs.
      */
+    @SuppressWarnings("resource") // InputStream is managed through a InputStreamReader in this instance.
     public XmlStreamReader(final InputStream inputStream, final String httpContentType,
             final boolean lenient, final String defaultEncoding) throws IOException {
         Objects.requireNonNull(inputStream, "inputStream");
@@ -516,6 +519,7 @@ public class XmlStreamReader extends Reader {
         final boolean lenient = true;
         final String contentType = conn.getContentType();
         final InputStream inputStream = conn.getInputStream();
+        @SuppressWarnings("resource") // managed by the InputStreamReader tracked by this instance
         final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(inputStream, BUFFER_SIZE), false, BOMS);
         final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
         if (conn instanceof HttpURLConnection || contentType != null) {