You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commons-dev@ws.apache.org by ve...@apache.org on 2010/09/28 00:03:00 UTC

svn commit: r1001925 - in /webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect: BEAInputFactoryWrapper.java EncodingDetectionHelper.java

Author: veithen
Date: Mon Sep 27 22:03:00 2010
New Revision: 1001925

URL: http://svn.apache.org/viewvc?rev=1001925&view=rev
Log:
Moved the character encoding autodetection code (Appendix F.1 of the XML specs) into a separate class so that we can reuse if for other broken StAX implementations.

Added:
    webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/EncodingDetectionHelper.java   (with props)
Modified:
    webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/BEAInputFactoryWrapper.java

Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/BEAInputFactoryWrapper.java
URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/BEAInputFactoryWrapper.java?rev=1001925&r1=1001924&r2=1001925&view=diff
==============================================================================
--- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/BEAInputFactoryWrapper.java (original)
+++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/BEAInputFactoryWrapper.java Mon Sep 27 22:03:00 2010
@@ -19,9 +19,7 @@
 
 package org.apache.axiom.util.stax.dialect;
 
-import java.io.IOException;
 import java.io.InputStream;
-import java.io.PushbackInputStream;
 import java.io.Reader;
 
 import javax.xml.stream.XMLInputFactory;
@@ -50,62 +48,9 @@ class BEAInputFactoryWrapper extends XML
         // information is already available from the stream reader, so that we don't need to
         // reimplement this part.
         // TODO: this needs some more unit testing!
-        byte[] startBytes = new byte[4];
-        try {
-            boolean useMark = stream.markSupported();
-            if (useMark) {
-                stream.mark(4);
-            } else {
-                stream = new PushbackInputStream(stream, 4);
-            }
-            int read = 0;
-            do {
-                int c = stream.read(startBytes, read, 4-read);
-                if (c == -1) {
-                    throw new XMLStreamException("Unexpected end of stream");
-                }
-                read += c;
-            } while (read < 4);
-            if (useMark) {
-                stream.reset();
-            } else {
-                ((PushbackInputStream)stream).unread(startBytes);
-            }
-        } catch (IOException ex) {
-            throw new XMLStreamException("Unable to read start bytes", ex);
-        }
-        int marker = ((startBytes[0] & 0xFF) << 24) + ((startBytes[1] & 0xFF) << 16)
-                + ((startBytes[2] & 0xFF) << 8) + (startBytes[3] & 0xFF);
-        String encoding;
-        switch (marker) {
-            case 0x0000FEFF:
-            case 0xFFFE0000:
-            case 0x0000FFFE:
-            case 0xFEFF0000:
-            case 0x0000003C:
-            case 0x3C000000:
-            case 0x00003C00:
-            case 0x003C0000:
-                encoding = "UCS-4";
-                break;
-            case 0x003C003F:
-                encoding = "UTF-16BE";
-                break;
-            case 0x3C003F00:
-                encoding = "UTF-16LE";
-                break;
-            case 0x3C3F786D:
-                encoding = "UTF-8";
-                break;
-            default:
-                if ((marker & 0xFFFF0000) == 0xFEFF0000) {
-                    encoding = "UTF-16BE";
-                } else if ((marker & 0xFFFF0000) == 0xFFFE0000) {
-                    encoding = "UTF-16LE";
-                } else {
-                    encoding = "UTF-8";
-                }
-        }
+        EncodingDetectionHelper helper = new EncodingDetectionHelper(stream);
+        stream = helper.getInputStream();
+        String encoding = helper.detectEncoding();
         XMLStreamReader reader;
         if (systemId == null) {
             reader = super.createXMLStreamReader(stream);

Added: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/EncodingDetectionHelper.java
URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/EncodingDetectionHelper.java?rev=1001925&view=auto
==============================================================================
--- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/EncodingDetectionHelper.java (added)
+++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/EncodingDetectionHelper.java Mon Sep 27 22:03:00 2010
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.axiom.util.stax.dialect;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+
+import javax.xml.stream.XMLStreamException;
+
+/**
+ * Implements the character encoding autodetection algorithm described in Appendix F.1 of the
+ * XML 1.0 specifications (Fifth Edition).
+ */
+class EncodingDetectionHelper {
+    private final InputStream stream;
+    private final boolean useMark;
+    
+    public EncodingDetectionHelper(InputStream stream) {
+        useMark = stream.markSupported();
+        if (useMark) {
+            this.stream = stream;
+        } else {
+            this.stream = new PushbackInputStream(stream, 4);
+        }
+    }
+    
+    public InputStream getInputStream() {
+        return stream;
+    }
+    
+    public String detectEncoding() throws XMLStreamException {
+        byte[] startBytes = new byte[4];
+        try {
+            if (useMark) {
+                stream.mark(4);
+            }
+            int read = 0;
+            do {
+                int c = stream.read(startBytes, read, 4-read);
+                if (c == -1) {
+                    throw new XMLStreamException("Unexpected end of stream");
+                }
+                read += c;
+            } while (read < 4);
+            if (useMark) {
+                stream.reset();
+            } else {
+                ((PushbackInputStream)stream).unread(startBytes);
+            }
+        } catch (IOException ex) {
+            throw new XMLStreamException("Unable to read start bytes", ex);
+        }
+        int marker = ((startBytes[0] & 0xFF) << 24) + ((startBytes[1] & 0xFF) << 16)
+                + ((startBytes[2] & 0xFF) << 8) + (startBytes[3] & 0xFF);
+        switch (marker) {
+            case 0x0000FEFF:
+            case 0xFFFE0000:
+            case 0x0000FFFE:
+            case 0xFEFF0000:
+            case 0x0000003C:
+            case 0x3C000000:
+            case 0x00003C00:
+            case 0x003C0000:
+                return "UCS-4";
+            case 0x003C003F:
+                return "UTF-16BE";
+            case 0x3C003F00:
+                return "UTF-16LE";
+            case 0x3C3F786D:
+                return "UTF-8";
+            default:
+                if ((marker & 0xFFFF0000) == 0xFEFF0000) {
+                    return "UTF-16BE";
+                } else if ((marker & 0xFFFF0000) == 0xFFFE0000) {
+                    return "UTF-16LE";
+                } else {
+                    return "UTF-8";
+                }
+        }
+    }
+}

Propchange: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/EncodingDetectionHelper.java
------------------------------------------------------------------------------
    svn:eol-style = native