You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2016/07/22 11:33:55 UTC

svn commit: r1753775 - in /uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima: cas/impl/CommonSerDes.java util/CasIOUtils.java

Author: pkluegl
Date: Fri Jul 22 11:33:55 2016
New Revision: 1753775

URL: http://svn.apache.org/viewvc?rev=1753775&view=rev
Log:
UIMA-4685
- reuse Header
- extend Header with typesystem inclusion info

Modified:
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java?rev=1753775&r1=1753774&r2=1753775&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java Fri Jul 22 11:33:55 2016
@@ -48,6 +48,7 @@ public class CommonSerDes {
    *     - bit in 0x01 position: on for binary non-delta (redundant)   
    *     - bit in 0x02 position: on means delta, off - not delta
    *     - bit in 0x04 position: on means compressed, off means plain binary
+   *     - bit in 0x08 position: on means typer system included
    *     - bits  0xF8 reserved
    *     
    *     - byte in 0xFF 00 position: incrementing (starting w/ 0) version
@@ -63,12 +64,13 @@ public class CommonSerDes {
    *     - bit in 0x01 position: on means form6, off = form 4 
    *********************************************/
   
-  static class Header {
+  public static class Header {
     boolean isDelta;
     boolean isCompressed;
     boolean isV3style;
     boolean form4;
     boolean form6;
+    boolean typeSystemIncluded;
     byte seqVersionNbr;
     boolean isV3;
     boolean swap;
@@ -77,21 +79,24 @@ public class CommonSerDes {
     
     Reading reading;
     
-    Header delta() {isDelta = true;  return this; }
-    Header delta(boolean v2) {isDelta = v2;  return this; }
-    Header form4() {isCompressed = form4 = true; form6 = false; return this; }
-    Header form6() {isCompressed = form6 = true; form4 = false; return this; }
-    Header seqVer(int v2) { assert (v2 >= 0 && v2 < 256); seqVersionNbr = (byte)v2; return this; }
-    Header v3() {isV3 = true; return this; }
+    public Header delta() {isDelta = true;  return this; }
+    public Header delta(boolean v2) {isDelta = v2;  return this; }
+    public Header form4() {isCompressed = form4 = true; form6 = false; return this; }
+    public Header form6() {isCompressed = form6 = true; form4 = false; return this; }
+    public Header typeSystemIncluded() {typeSystemIncluded = true; return this; }
+    public Header seqVer(int v2) { assert (v2 >= 0 && v2 < 256); seqVersionNbr = (byte)v2; return this; }
+    public Header v3() {isV3 = true; return this; }
     
     
-    void write(DataOutputStream dos) throws IOException {
+    public void write(DataOutputStream dos) throws IOException {
       v = (!isCompressed && !isDelta) ? 1 : 0;
       if (isDelta) v |= 0x02;
       if (isCompressed) v |= 0x04;
+      if (typeSystemIncluded) v |= 0x08;
       v |= (seqVersionNbr << 8);
       if (isV3) v |= 0x010000;
       
+      
       byte[] uima = new byte[4];
       uima[0] = 85; // U
       uima[1] = 73; // I
@@ -107,15 +112,43 @@ public class CommonSerDes {
       if (isCompressed) {
         dos.writeInt(form6 ? 1 : 0);
       }
+      
+    }
+    
+    public boolean isDelta() {
+      return isDelta;
+    }
+    public boolean isCompressed() {
+      return isCompressed;
+    }
+    public boolean isV3style() {
+      return isV3style;
+    }
+    public boolean isForm4() {
+      return form4;
     }
+    public boolean isForm6() {
+      return form6;
+    }
+    public boolean isTypeSystemIncluded() {
+      return typeSystemIncluded;
+    }
+    public byte getSeqVersionNbr() {
+      return seqVersionNbr;
+    }
+    public boolean isV3() {
+      return isV3;
+    }
+
+    
   }
   
-  static Header createHeader() {
+  public static Header createHeader() {
     return new Header();
   }
   
   
-  static Header readHeader(DataInputStream dis) throws IOException {
+  public static Header readHeader(DataInputStream dis) throws IOException {
 
     Header h = new Header();
     // key
@@ -134,6 +167,7 @@ public class CommonSerDes {
     
     h.isDelta = (v & 2) != 0;
     h.isCompressed = (v & 4) != 0;
+    h.typeSystemIncluded = (v & 8) != 0;
     h.seqVersionNbr = (byte) ((v & 0xFF00) >> 8);
    
     if (h.isCompressed) {
@@ -145,14 +179,14 @@ public class CommonSerDes {
     return h;
   }
 
-  static DataOutputStream maybeWrapToDataOutputStream(OutputStream os) {
+  public static DataOutputStream maybeWrapToDataOutputStream(OutputStream os) {
     if (os instanceof DataOutputStream) {
       return (DataOutputStream) os;
     }
     return new DataOutputStream(os);
   }
   
-  static DataInputStream maybeWrapToDataInputStream(InputStream os) {
+  public static DataInputStream maybeWrapToDataInputStream(InputStream os) {
     if (os instanceof DataInputStream) {
       return (DataInputStream) os;
     }
@@ -163,7 +197,7 @@ public class CommonSerDes {
    * byte swapping reads of integer forms
    */
  
-  static class Reading {
+  public static class Reading {
     final DataInputStream dis;
     final boolean swap;
     

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java?rev=1753775&r1=1753774&r2=1753775&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java Fri Jul 22 11:33:55 2016
@@ -36,7 +36,6 @@ import java.io.ObjectOutputStream;
 import java.io.OutputStream;
 import java.net.URL;
 import java.nio.file.Path;
-import java.util.Arrays;
 
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.SerialFormat;
@@ -45,6 +44,7 @@ import org.apache.uima.cas.impl.CASImpl;
 import org.apache.uima.cas.impl.CASMgrSerializer;
 import org.apache.uima.cas.impl.CASSerializer;
 import org.apache.uima.cas.impl.CommonSerDes;
+import org.apache.uima.cas.impl.CommonSerDes.Header;
 import org.apache.uima.cas.impl.TypeSystemImpl;
 import org.apache.uima.cas.impl.XCASDeserializer;
 import org.apache.uima.cas.impl.XCASSerializer;
@@ -55,10 +55,6 @@ import org.xml.sax.SAXException;
 
 public class CasIOUtils {
 
-  public static final byte[] UIMA_TS_HEADER = new byte[] { 'U', 'I', 'M', 'A', 'T', 'S' };
-
-  public static final byte[] UIMA_HEADER = new byte[] { 'U', 'I', 'M', 'A' };
-
   /**
    * 
    * @param casPath
@@ -199,8 +195,8 @@ public class CasIOUtils {
    *          ignore feature structures of non-existing types
    * @throws IOException
    */
-  public static SerialFormat load(InputStream casInputStream, InputStream tsInputStream,
-          CAS aCAS, boolean lentiently) throws IOException {
+  public static SerialFormat load(InputStream casInputStream, InputStream tsInputStream, CAS aCAS,
+          boolean lentiently) throws IOException {
     BufferedInputStream bis = new BufferedInputStream(casInputStream);
     bis.mark(32);
     byte[] headerXml = new byte[16];
@@ -212,7 +208,8 @@ public class CasIOUtils {
         XmiCasDeserializer.deserialize(bis, aCAS, lentiently);
         return SerialFormat.XMI;
       } catch (SAXException e) {
-        throw new IOException(e);
+        throw new IllegalArgumentException(
+                "Error parsing XMI file. XCAS format not supported for InputStream. Please use File, Path or URL interface.");
       }
     }
     return loadBinary(bis, tsInputStream, aCAS);
@@ -269,76 +266,49 @@ public class CasIOUtils {
   public static SerialFormat loadBinary(InputStream is, CASMgrSerializer casMgr, CAS aCAS)
           throws IOException {
     try {
-      BufferedInputStream bis = new BufferedInputStream(is);
       TypeSystemImpl ts = null;
-
+      DataInputStream dis = CommonSerDes.maybeWrapToDataInputStream(is);
+      Header header = CommonSerDes.readHeader(dis);
+      dis.reset();
       // Check if this is original UIMA CAS format or an extended format with type system
-      bis.mark(32);
-      DataInputStream dis = new DataInputStream(bis);
-
-      byte[] header = new byte[UIMA_TS_HEADER.length];
-      dis.read(header);
 
       // If it is UIMA with type system format, read the type system
-      if (Arrays.equals(header, UIMA_TS_HEADER)) {
-        ObjectInputStream ois = new ObjectInputStream(bis);
+      if (header.isForm6() && header.isTypeSystemIncluded()) {
+        // read additional header again
+        CommonSerDes.readHeader(dis);
+        ObjectInputStream ois = new ObjectInputStream(dis);
         CASMgrSerializer casMgrSerializer = (CASMgrSerializer) ois.readObject();
         ts = casMgrSerializer.getTypeSystem();
         ts.commit();
-      } else {
-        bis.reset();
       }
 
       if (ts != null) {
         // Only format 6 can have type system information
-        deserializeCAS(aCAS, bis, ts, null);
+        deserializeCAS(aCAS, dis, ts, null);
         return SerialFormat.COMPRESSED_FILTERED_TS;
       } else {
 
         // Check if this is a UIMA binary CAS stream
-        byte[] header4 = new byte[UIMA_HEADER.length];
-        dis.read(header4);
-
-        if (header4[0] != 'U') {
-          // ArrayUtils.reverse(header4);
-          for (int i = 0; i < header4.length / 2; i++) {
-            byte temp = header4[i];
-            header4[i] = header4[header4.length - i - 1];
-            header4[header4.length - i - 1] = temp;
-          }
-        }
-
-        // Peek into the version
-        int version = dis.readInt();
-        int version1 = dis.readInt();
-        bis.reset();
-
-        if (Arrays.equals(header4, UIMA_HEADER)) {
-          // It is a binary CAS stream
-
-          if ((version & 4) == 4 && (version1 != 0)) {
-            // This is a form 6
-            if (ts == null && casMgr != null) {
-              // If there was not type system in the file but one is set, then load it
-              ts = casMgr.getTypeSystem();
-              ts.commit();
-            }
-            deserializeCAS(aCAS, bis, ts, null);
-            return SerialFormat.COMPRESSED_FILTERED;
-          } else {
-            // This is a form 0 or 4
-            deserializeCAS(aCAS, bis);
-            if (version == 4) {
-              return SerialFormat.COMPRESSED;
-            }
-            return SerialFormat.BINARY;
+        if (header.isForm4()) {
+          deserializeCAS(aCAS, dis);
+          return SerialFormat.COMPRESSED;
+        } else if (header.isForm6()) {
+          if (ts == null && casMgr != null) {
+            // If there was not type system in the file but one is set, then load it
+            ts = casMgr.getTypeSystem();
+            ts.commit();
           }
+          deserializeCAS(aCAS, dis, ts, null);
+          return SerialFormat.COMPRESSED_FILTERED;
+        } else if (header.getSeqVersionNbr() == 1) {
+          deserializeCAS(aCAS, dis);
+          return SerialFormat.BINARY;
         } else {
-          // If it is not a UIMA binary CAS stream and not xml, assume it is output from
-          // SerializedCasWriter
-          ObjectInputStream ois = new ObjectInputStream(bis);
+          // read additional header again
+          CommonSerDes.readHeader(dis);
+          ObjectInputStream ois = new ObjectInputStream(dis);
           Object object = ois.readObject();
-          if (object instanceof CASCompleteSerializer) {
+          if (object instanceof CASCompleteSerializer && header.isTypeSystemIncluded()) {
             CASCompleteSerializer serializer = (CASCompleteSerializer) object;
             deserializeCASComplete(serializer, (CASImpl) aCAS);
             return SerialFormat.SERIALIZED_TS;
@@ -361,6 +331,7 @@ public class CasIOUtils {
                     + object.getClass().getName() + "]");
           }
         }
+
       }
     } catch (ResourceInitializationException e) {
       throw new IOException(e);
@@ -371,7 +342,6 @@ public class CasIOUtils {
         is.close();
       }
     }
-
   }
 
   /**
@@ -401,8 +371,7 @@ public class CasIOUtils {
    *          The SerialFormat in which the CAS should be stored.
    * @throws IOException
    */
-  public static void save(CAS aCas, OutputStream docOS, SerialFormat format)
-          throws IOException {
+  public static void save(CAS aCas, OutputStream docOS, SerialFormat format) throws IOException {
     save(aCas, docOS, null, format);
   }
 
@@ -422,9 +391,10 @@ public class CasIOUtils {
    *          The SerialFormat in which the CAS should be stored.
    * @throws IOException
    */
-  public static void save(CAS aCas, OutputStream docOS, OutputStream typeOS,
-          SerialFormat format) throws IOException {
+  public static void save(CAS aCas, OutputStream docOS, OutputStream typeOS, SerialFormat format)
+          throws IOException {
     boolean typeSystemWritten = false;
+    DataOutputStream dos = CommonSerDes.maybeWrapToDataOutputStream(docOS);
     try {
       switch (format) {
         case XMI:
@@ -438,6 +408,8 @@ public class CasIOUtils {
         case SERIALIZED:
         // Java-serialized CAS without type system
         {
+          Header additionalHeader = CommonSerDes.createHeader();
+          additionalHeader.write(dos);
           CASSerializer serializer = new CASSerializer();
           serializer.addCAS((CASImpl) aCas);
           ObjectOutputStream objOS = new ObjectOutputStream(docOS);
@@ -448,6 +420,8 @@ public class CasIOUtils {
         case SERIALIZED_TS:
         // Java-serialized CAS with type system
         {
+          Header additionalHeader = CommonSerDes.createHeader().typeSystemIncluded();
+          additionalHeader.write(dos);
           ObjectOutputStream objOS = new ObjectOutputStream(docOS);
           CASCompleteSerializer serializer = serializeCASComplete((CASImpl) aCas);
           objOS.writeObject(serializer);
@@ -463,7 +437,6 @@ public class CasIOUtils {
           // Binary compressed CAS without type system (form 4)
           serializeWithCompression(aCas, docOS);
           break;
-
         case COMPRESSED_FILTERED:
           // Binary compressed CAS (form 6)
           serializeWithCompression(aCas, docOS, aCas.getTypeSystem());
@@ -471,7 +444,8 @@ public class CasIOUtils {
         case COMPRESSED_FILTERED_TS:
           // Binary compressed CAS (form 6)
           // ... with embedded Java-serialized type system
-          writeHeader(docOS);
+          Header additionalHeader = CommonSerDes.createHeader().form6().typeSystemIncluded();
+          additionalHeader.write(dos);
           writeTypeSystem(aCas, docOS);
           typeSystemWritten = true; // Embedded type system
           serializeWithCompression(aCas, docOS, aCas.getTypeSystem());
@@ -506,12 +480,6 @@ public class CasIOUtils {
     return casMgrSerializer;
   }
 
-  private static void writeHeader(OutputStream aOS) throws IOException {
-    DataOutputStream dataOS = new DataOutputStream(aOS);
-    dataOS.write(UIMA_TS_HEADER);
-    dataOS.flush();
-  }
-
   private static void writeTypeSystem(CAS aCas, OutputStream aOS) throws IOException {
     ObjectOutputStream typeOS = new ObjectOutputStream(aOS);
     CASMgrSerializer casMgrSerializer = serializeCASMgr((CASImpl) aCas);