You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commons-dev@ws.apache.org by ve...@apache.org on 2010/09/29 11:14:27 UTC

svn commit: r1002531 - in /webservices/commons/trunk/modules/axiom/modules/axiom-api/src: main/java/org/apache/axiom/util/stax/dialect/ test/java/org/apache/axiom/util/stax/dialect/

Author: veithen
Date: Wed Sep 29 09:14:26 2010
New Revision: 1002531

URL: http://svn.apache.org/viewvc?rev=1002531&view=rev
Log:
Work around broken character encoding autodetection in XLXP2.

Added:
    webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2InputFactoryWrapper.java   (with props)
Modified:
    webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2Dialect.java
    webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/DialectTest.java
    webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/GetEncodingFromDetectionTestCase.java

Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2Dialect.java
URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2Dialect.java?rev=1002531&r1=1002530&r2=1002531&view=diff
==============================================================================
--- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2Dialect.java (original)
+++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2Dialect.java Wed Sep 29 09:14:26 2010
@@ -63,7 +63,7 @@ class XLXP2Dialect extends AbstractStAXD
     }
 
     public XMLInputFactory normalize(XMLInputFactory factory) {
-        return new NormalizingXMLInputFactoryWrapper(factory, this);
+        return new XLXP2InputFactoryWrapper(factory, this);
     }
 
     public XMLOutputFactory normalize(XMLOutputFactory factory) {

Added: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2InputFactoryWrapper.java
URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2InputFactoryWrapper.java?rev=1002531&view=auto
==============================================================================
--- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2InputFactoryWrapper.java (added)
+++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2InputFactoryWrapper.java Wed Sep 29 09:14:26 2010
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.axiom.util.stax.dialect;
+
+import java.io.InputStream;
+
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+class XLXP2InputFactoryWrapper extends NormalizingXMLInputFactoryWrapper {
+    public XLXP2InputFactoryWrapper(XMLInputFactory parent, AbstractStAXDialect dialect) {
+        super(parent, dialect);
+    }
+
+    public XMLStreamReader createXMLStreamReader(InputStream stream) throws XMLStreamException {
+        return createXMLStreamReader(null, stream);
+    }
+
+    public XMLStreamReader createXMLStreamReader(String systemId, InputStream stream)
+            throws XMLStreamException {
+        // XLXP2 fails on documents that use UTF-16 without byte order marker,
+        // although this type of document is explicitly supported by the XML
+        // specification.
+        EncodingDetectionHelper helper = new EncodingDetectionHelper(stream);
+        stream = helper.getInputStream();
+        String encoding = helper.detectEncoding();
+        if (encoding.startsWith("UTF-16")) {
+            if (systemId == null) {
+                return super.createXMLStreamReader(stream, encoding);
+            } else {
+                // Here we have an issue because it is not possible to specify the
+                // systemId and the encoding at the same time...
+                return super.createXMLStreamReader(systemId, stream);
+            }
+        } else {
+            if (systemId == null) {
+                return super.createXMLStreamReader(stream);
+            } else {
+                return super.createXMLStreamReader(systemId, stream);
+            }
+        }
+    }
+}

Propchange: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/main/java/org/apache/axiom/util/stax/dialect/XLXP2InputFactoryWrapper.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/DialectTest.java
URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/DialectTest.java?rev=1002531&r1=1002530&r2=1002531&view=diff
==============================================================================
--- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/DialectTest.java (original)
+++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/DialectTest.java Wed Sep 29 09:14:26 2010
@@ -47,8 +47,17 @@ public class DialectTest extends TestSui
         addDialectTest(new GetCharacterEncodingSchemeTestCase());
         addDialectTest(new GetEncodingExternalTestCase());
         addDialectTest(new GetEncodingFromDetectionTestCase("UTF-8", "UTF-8"));
-        addDialectTest(new GetEncodingFromDetectionTestCase("UnicodeBig", "UTF-16BE"));
-        addDialectTest(new GetEncodingFromDetectionTestCase("UnicodeLittle", "UTF-16LE"));
+        // The case of UTF-16 with a byte order marker is not well defined:
+        // * One may argue that the result should be UTF-16BE or UTF-16LE because
+        //   otherwise the information about the byte order is lost.
+        // * On the other hand, one may argue that the result should be UTF-16
+        //   because UTF-16BE or UTF-16LE may be interpreted as an indication that
+        //   there should be no BOM.
+        // Therefore we accept both results.
+        addDialectTest(new GetEncodingFromDetectionTestCase("UnicodeBig", new String[] { "UTF-16", "UTF-16BE" } ));
+        addDialectTest(new GetEncodingFromDetectionTestCase("UnicodeLittle", new String[] { "UTF-16", "UTF-16LE" }));
+        // Here there is no doubt; if the encoding is UTF-16 without BOM, then the
+        // parser should report the detected byte order.
         addDialectTest(new GetEncodingFromDetectionTestCase("UnicodeBigUnmarked", "UTF-16BE"));
         addDialectTest(new GetEncodingFromDetectionTestCase("UnicodeLittleUnmarked", "UTF-16LE"));
         addDialectTest(new GetEncodingTestCase());

Modified: webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/GetEncodingFromDetectionTestCase.java
URL: http://svn.apache.org/viewvc/webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/GetEncodingFromDetectionTestCase.java?rev=1002531&r1=1002530&r2=1002531&view=diff
==============================================================================
--- webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/GetEncodingFromDetectionTestCase.java (original)
+++ webservices/commons/trunk/modules/axiom/modules/axiom-api/src/test/java/org/apache/axiom/util/stax/dialect/GetEncodingFromDetectionTestCase.java Wed Sep 29 09:14:26 2010
@@ -19,6 +19,9 @@
 package org.apache.axiom.util.stax.dialect;
 
 import java.io.ByteArrayInputStream;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
 
 import javax.xml.stream.XMLInputFactory;
 import javax.xml.stream.XMLStreamReader;
@@ -31,18 +34,24 @@ import javax.xml.stream.XMLStreamReader;
  */
 public class GetEncodingFromDetectionTestCase extends DialectTestCase {
     private final String javaEncoding;
-    private final String xmlEncoding;
+    private final Set xmlEncodings;
 
-    public GetEncodingFromDetectionTestCase(String javaEncoding, String xmlEncoding) {
+    public GetEncodingFromDetectionTestCase(String javaEncoding, String[] xmlEncodings) {
         this.javaEncoding = javaEncoding;
-        this.xmlEncoding = xmlEncoding;
+        this.xmlEncodings = new HashSet(Arrays.asList(xmlEncodings));
         setName(getClass().getName() + " [" + javaEncoding + "]");
     }
+    
+    public GetEncodingFromDetectionTestCase(String javaEncoding, String xmlEncoding) {
+        this(javaEncoding, new String[] { xmlEncoding });
+    }
 
     protected void runTest() throws Throwable {
         XMLInputFactory factory = newNormalizedXMLInputFactory();
         XMLStreamReader reader = factory.createXMLStreamReader(new ByteArrayInputStream(
                 "<?xml version=\"1.0\"?><root/>".getBytes(javaEncoding)));
-        assertEquals(xmlEncoding, reader.getEncoding());
+        String actualEncoding = reader.getEncoding();
+        assertTrue("Expected one of " + xmlEncodings + ", but got " + actualEncoding,
+                   xmlEncodings.contains(actualEncoding));
     }
 }