You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ki...@apache.org on 2021/05/29 15:25:04 UTC

svn commit: r1890311 [1/2] - in /xmlbeans: site/src/documentation/content/xdocs/ trunk/src/main/java/org/apache/xmlbeans/impl/schema/ trunk/src/main/java/org/apache/xmlbeans/impl/util/ trunk/src/test/java/compile/scomp/detailed/ trunk/src/test/resource...

Author: kiwiwings
Date: Sat May 29 15:25:04 2021
New Revision: 1890311

URL: http://svn.apache.org/viewvc?rev=1890311&view=rev
Log:
XMLBEANS-556 - Support enumerations with more than 64k entries

Added:
    xmlbeans/trunk/src/test/resources/xbean/compile/scomp/largeAnnotation/largeEnum.xsd
      - copied, changed from r1890295, xmlbeans/trunk/src/test/resources/xbean/compile/scomp/largeAnnotation/largeAnnotation.xsd
Modified:
    xmlbeans/site/src/documentation/content/xdocs/status.xml
    xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/schema/SchemaTypeSystemImpl.java
    xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/util/LongUTFDataInputStream.java
    xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/util/LongUTFDataOutputStream.java
    xmlbeans/trunk/src/test/java/compile/scomp/detailed/LargeAnnotation.java
    xmlbeans/trunk/src/test/resources/xbean/compile/scomp/largeAnnotation/largeAnnotation.xsd

Modified: xmlbeans/site/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/xmlbeans/site/src/documentation/content/xdocs/status.xml?rev=1890311&r1=1890310&r2=1890311&view=diff
==============================================================================
--- xmlbeans/site/src/documentation/content/xdocs/status.xml (original)
+++ xmlbeans/site/src/documentation/content/xdocs/status.xml Sat May 29 15:25:04 2021
@@ -53,6 +53,7 @@
             <action dev="PD" type="update" context="code" fixes-bug="XMLBEANS-214">Support the 2009 version of xml.xsd</action>
             <action dev="PD" type="update" context="code" fixes-bug="XMLBEANS-563">maven plugin does not have feature parity with codehaus maven plugin or allow all configuration options</action>
             <action dev="PD" type="update" context="code" fixes-bug="XMLBEANS-235">Support annotations &gt; 64kb</action>
+            <action dev="PD" type="update" context="code" fixes-bug="XMLBEANS-556">Support enumerations with more than 64k entries</action>
         </actions>
     </release>
 

Modified: xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/schema/SchemaTypeSystemImpl.java
URL: http://svn.apache.org/viewvc/xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/schema/SchemaTypeSystemImpl.java?rev=1890311&r1=1890310&r2=1890311&view=diff
==============================================================================
--- xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/schema/SchemaTypeSystemImpl.java (original)
+++ xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/schema/SchemaTypeSystemImpl.java Sat May 29 15:25:04 2021
@@ -636,19 +636,13 @@ public class SchemaTypeSystemImpl extend
         }
 
         String stringForCode(int code) {
-            if (code == 0) {
-                return null;
-            }
-            return intsToStrings.get(code);
+            return code == 0 ? null : intsToStrings.get(code);
         }
 
         void writeTo(LongUTFDataOutputStream output) {
-            if (intsToStrings.size() >= MAX_UNSIGNED_SHORT) {
-                throw new SchemaTypeLoaderException("Too many strings (" + intsToStrings.size() + ")", _name, _handle, SchemaTypeLoaderException.INT_TOO_LARGE);
-            }
-
             try {
-                output.writeShort(intsToStrings.size());
+                int cnt = intsToStrings.size();
+                output.writeShortOrInt(cnt);
                 boolean isNext = false;
                 for (String str : intsToStrings) {
                     if (isNext) {
@@ -667,7 +661,7 @@ public class SchemaTypeSystemImpl extend
             }
 
             try {
-                int size = input.readUnsignedShort();
+                int size = input.readUnsignedShortOrInt();
                 for (int i = 1; i < size; i++) {
                     String str = input.readLongUTF().intern();
                     int code = codeForString(str);
@@ -1386,6 +1380,24 @@ public class SchemaTypeSystemImpl extend
             }
         }
 
+        int readUnsignedShortOrInt() {
+            try {
+                return _input.readUnsignedShortOrInt();
+            } catch (IOException e) {
+                throw new SchemaTypeLoaderException(e.getMessage(), _name, _handle, SchemaTypeLoaderException.IO_EXCEPTION, e);
+            }
+        }
+
+        void writeShortOrInt(int s) {
+            if (_output != null) {
+                try {
+                    _output.writeShortOrInt(s);
+                } catch (IOException e) {
+                    throw new SchemaTypeLoaderException(e.getMessage(), _name, _handle, SchemaTypeLoaderException.IO_EXCEPTION, e);
+                }
+            }
+        }
+
         int readInt() {
             try {
                 return _input.readInt();
@@ -1405,12 +1417,13 @@ public class SchemaTypeSystemImpl extend
         }
 
         String readString() {
-            return _stringPool.stringForCode(readShort());
+            int code = readUnsignedShortOrInt();
+            return _stringPool.stringForCode(code);
         }
 
         void writeString(String str) {
             int code = _stringPool.codeForString(str);
-            writeShort(code);
+            writeShortOrInt(code);
         }
 
         QName readQName() {
@@ -2118,7 +2131,7 @@ public class SchemaTypeSystemImpl extend
 
                     impl.setBaseEnumTypeRef(readTypeRef());
                     if (isStringEnum) {
-                        int seCount = readShort();
+                        int seCount = readUnsignedShortOrInt();
                         SchemaStringEnumEntry[] entries = new SchemaStringEnumEntry[seCount];
                         for (int i = 0; i < seCount; i++) {
                             entries[i] = new SchemaStringEnumEntryImpl(readString(), readShort(), readString());
@@ -2342,7 +2355,7 @@ public class SchemaTypeSystemImpl extend
                 if (enumValues == null) {
                     writeShort(0);
                 } else {
-                    writeShort(enumValues.length);
+                    writeShortOrInt(enumValues.length);
                     for (XmlAnySimpleType enumValue : enumValues) {
                         writeXmlValueObject(enumValue);
                     }

Modified: xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/util/LongUTFDataInputStream.java
URL: http://svn.apache.org/viewvc/xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/util/LongUTFDataInputStream.java?rev=1890311&r1=1890310&r2=1890311&view=diff
==============================================================================
--- xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/util/LongUTFDataInputStream.java (original)
+++ xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/util/LongUTFDataInputStream.java Sat May 29 15:25:04 2021
@@ -15,10 +15,12 @@
 
 package org.apache.xmlbeans.impl.util;
 
-import java.io.*;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UTFDataFormatException;
 
-import static org.apache.xmlbeans.impl.util.LongUTFDataOutputStream.DATA_OUTPUT_CHUNKS;
-import static org.apache.xmlbeans.impl.util.LongUTFDataOutputStream.LONG_UTF_MAGIC;
+import static org.apache.xmlbeans.impl.util.LongUTFDataOutputStream.MAX_UNSIGNED_SHORT;
 
 /**
  * This class works around the size limitation of UTF strings (&lt; 64kb) of DataInputStream
@@ -26,31 +28,27 @@ import static org.apache.xmlbeans.impl.u
  */
 public class LongUTFDataInputStream extends DataInputStream {
     public LongUTFDataInputStream(InputStream in) {
-        super(wrap(in));
-    }
-
-    private static InputStream wrap(InputStream is) {
-        return is.markSupported() ? is : new BufferedInputStream(is);
+        super(in);
     }
 
     private interface IOCall {
         byte onebyte(int[] readBuf, int[] fillBuf, int[] readLen) throws IOException;
     }
 
-    public String readLongUTF() throws IOException {
-        mark(6);
-        int utfLen1 = readShort() & 0x0000FFFF;
-        if (utfLen1 < DATA_OUTPUT_CHUNKS) {
-            reset();
-            return readUTF();
-        }
-        int magic = readInt();
-        if (magic != LONG_UTF_MAGIC) {
-            reset();
-            return readUTF();
+    public int readUnsignedShortOrInt() throws IOException {
+        return readUnsignedShortOrInt(this);
+    }
+
+    public static int readUnsignedShortOrInt(DataInputStream dis) throws IOException {
+        int value = dis.readUnsignedShort();
+        if (value == MAX_UNSIGNED_SHORT) {
+            value = dis.readInt();
         }
+        return value;
+    }
 
-        final int utfLen = readInt();
+    public String readLongUTF() throws IOException {
+        final int utfLen = readUnsignedShortOrInt();
         StringBuilder sb = new StringBuilder(utfLen/2);
         final byte[] bytearr = new byte[4096];
 

Modified: xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/util/LongUTFDataOutputStream.java
URL: http://svn.apache.org/viewvc/xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/util/LongUTFDataOutputStream.java?rev=1890311&r1=1890310&r2=1890311&view=diff
==============================================================================
--- xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/util/LongUTFDataOutputStream.java (original)
+++ xmlbeans/trunk/src/main/java/org/apache/xmlbeans/impl/util/LongUTFDataOutputStream.java Sat May 29 15:25:04 2021
@@ -24,17 +24,28 @@ import java.io.OutputStream;
  * and needs to be used with LongUTFDataInputStream
  */
 public class LongUTFDataOutputStream extends DataOutputStream {
-    /**
-     * Max. chunk size for string part to be output to DataOutputStream is 64kb
-     */
-    static final int DATA_OUTPUT_CHUNKS = 0x0000FFFF;
-
-    static final int LONG_UTF_MAGIC = 0xDA7A_DA7A;
+    // MAX_UNSIGNED_SHORT - actually (+1) but for the magic value we use the reduced value
+    static final int MAX_UNSIGNED_SHORT = Short.MAX_VALUE * 2;
 
     public LongUTFDataOutputStream(OutputStream out) {
         super(out);
     }
 
+    public void writeShortOrInt(int value) throws IOException {
+        writeShortOrInt(this, value);
+    }
+
+    public static void writeShortOrInt(DataOutputStream dos, int value) throws IOException {
+        // there are two values (0xFFFE and 0xFFFF) which are incompatible to the older (writeShort)
+        // implementation, i.e. if old schemas based on writeShort are processed
+        if (value < MAX_UNSIGNED_SHORT) {
+            dos.writeShort(value);
+        } else {
+            dos.writeShort(MAX_UNSIGNED_SHORT);
+            dos.writeInt(value);
+        }
+    }
+
     /**
      * Checks the length of the to-be-written UTF-8 array, if the length is below 64k then
      * {@link DataOutputStream#writeUTF(String)} is called, otherwise a 4-byte (int) is injected to list/count
@@ -44,14 +55,7 @@ public class LongUTFDataOutputStream ext
     public void writeLongUTF(String str) throws IOException {
         // DataOutputStream allows only 64k chunks - see XMLBeans-235
         final int utfLen = countUTF(str);
-        if (utfLen < DATA_OUTPUT_CHUNKS) {
-            writeUTF(str);
-            return;
-        }
-
-        writeShort(0xFFFF);
-        writeInt(LONG_UTF_MAGIC);
-        writeInt(utfLen);
+        writeShortOrInt(utfLen);
 
         final byte[] bytearr = new byte[4096];
         final int strlen = str.length();

Modified: xmlbeans/trunk/src/test/java/compile/scomp/detailed/LargeAnnotation.java
URL: http://svn.apache.org/viewvc/xmlbeans/trunk/src/test/java/compile/scomp/detailed/LargeAnnotation.java?rev=1890311&r1=1890310&r2=1890311&view=diff
==============================================================================
--- xmlbeans/trunk/src/test/java/compile/scomp/detailed/LargeAnnotation.java (original)
+++ xmlbeans/trunk/src/test/java/compile/scomp/detailed/LargeAnnotation.java Sat May 29 15:25:04 2021
@@ -55,12 +55,15 @@ public class LargeAnnotation {
             }
         }
 
-        targetStringLength = 0xFFFF;
+        // actually +1, but here it's used as a magic number
+        final int MAX_SHORT = Short.MAX_VALUE*2;
+
+        targetStringLength = MAX_SHORT;
         try (ByteArrayOutputStream bos = new ByteArrayOutputStream(targetStringLength + 3)) {
             try (LongUTFDataOutputStream ldos = new LongUTFDataOutputStream(bos)) {
                 String exp;
                 {
-                    char[] chs = new char[0xFFFF];
+                    char[] chs = new char[MAX_SHORT-1];
                     Arrays.fill(chs, 'a');
                     exp = new String(chs);
                 }
@@ -76,10 +79,10 @@ public class LargeAnnotation {
 
             String exp;
             {
-                char[] chs = new char[0xFFFF];
+                char[] chs = new char[MAX_SHORT];
                 Arrays.fill(chs, 'a');
-                chs[0xFFFD] = '\u1234';
-                chs[0xFFFE] = '\u5678';
+                chs[MAX_SHORT-2] = '\u1234';
+                chs[MAX_SHORT-1] = '\u5678';
                 exp = new String(chs);
             }
 
@@ -103,7 +106,7 @@ public class LargeAnnotation {
 
 
     @Test
-    public void bug235() throws XmlException, IOException {
+    public void bug235and556() throws XmlException, IOException {
         ArrayList<XmlError> err = new ArrayList<>();
         XmlOptions xm_opt = new XmlOptions().setErrorListener(err);
         xm_opt.setSavePrettyPrint();

Modified: xmlbeans/trunk/src/test/resources/xbean/compile/scomp/largeAnnotation/largeAnnotation.xsd
URL: http://svn.apache.org/viewvc/xmlbeans/trunk/src/test/resources/xbean/compile/scomp/largeAnnotation/largeAnnotation.xsd?rev=1890311&r1=1890310&r2=1890311&view=diff
==============================================================================
--- xmlbeans/trunk/src/test/resources/xbean/compile/scomp/largeAnnotation/largeAnnotation.xsd (original)
+++ xmlbeans/trunk/src/test/resources/xbean/compile/scomp/largeAnnotation/largeAnnotation.xsd Sat May 29 15:25:04 2021
@@ -18,13 +18,13 @@
             xmlns:xsd="http://www.w3.org/2001/XMLSchema"
             xmlns="http://xmlbeans.apache.org/largeEnum">
 
-    <xsd:element name="root">
+    <xsd:element name="rootAnnon">
         <xsd:complexType>
-            <xsd:attribute name="nonsense" type="LargeEnum" use="required" />
+            <xsd:attribute name="nonsense" type="LargeEnum1" use="required" />
         </xsd:complexType>
     </xsd:element>
 
-    <xsd:simpleType name="LargeEnum">
+    <xsd:simpleType name="LargeEnum1">
     <xsd:annotation>
         <xsd:documentation>
             Lorem ipsum dolor sit amet, consectetur adipiscing elit. Vivamus nec mauris ac



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org