You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2014/11/03 23:17:23 UTC

svn commit: r1636459 - in /uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl: CasSerializerSupport.java TypeSystemImpl.java XmiCasSerializer.java

Author: schor
Date: Mon Nov  3 22:17:22 2014
New Revision: 1636459

URL: http://svn.apache.org/r1636459
Log:
[UIMA-4083] fixes for JSON serialization

Modified:
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasSerializerSupport.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java
    uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasSerializerSupport.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasSerializerSupport.java?rev=1636459&r1=1636458&r2=1636459&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasSerializerSupport.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasSerializerSupport.java Mon Nov  3 22:17:22 2014
@@ -33,10 +33,9 @@ import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.CASRuntimeException;
 import org.apache.uima.cas.FSIndex;
 import org.apache.uima.cas.FeatureStructure;
-import org.apache.uima.internal.util.IntHashSet;
-import org.apache.uima.internal.util.IntStack;
 import org.apache.uima.internal.util.IntVector;
 import org.apache.uima.internal.util.PositiveIntSet;
+import org.apache.uima.internal.util.PositiveIntSet_impl;
 import org.apache.uima.internal.util.XmlElementName;
 import org.apache.uima.util.Level;
 import org.apache.uima.util.Logger;
@@ -214,7 +213,14 @@ public class CasSerializerSupport {
     
     abstract protected void writeView(int sofaAddr, int[] added, int[] deleted, int[] reindexed) throws Exception;  
     
-    abstract protected void writeFsStart(int addr, int typeCode) throws Exception;
+    /**
+     * 
+     * @param addr
+     * @param typeCode
+     * @return true if writing out referenced items (JSON)
+     * @throws Exception
+     */
+    abstract protected boolean writeFsStart(int addr, int typeCode) throws Exception;
     
     abstract protected void writeFs(int addr, int typeCode) throws Exception;
     
@@ -225,6 +231,8 @@ public class CasSerializerSupport {
     abstract protected void writeEndOfIndividualFs() throws Exception;  
     
     abstract protected void writeEndOfSerialization() throws Exception;
+    
+    abstract protected void writeFsRef(int addr) throws Exception;
   }
   
   /**
@@ -246,13 +254,13 @@ public class CasSerializerSupport {
      *  Computed during "enqueue" phase, prior to encoding
      *  Used to prevent duplicate enqueuing
      */    
-    public final PositiveIntSet visited_not_yet_written; 
+    public final PositiveIntSet_impl visited_not_yet_written; 
     
     /**
      * set of FSs that have multiple references
      * This is for JSON which is computing the multi-refs, not depending on the setting in a feature.
      */
-    public final IntHashSet multiRefFSs;
+    public final PositiveIntSet multiRefFSs;
     
     /* *********************************************
      * FSs that need to be serialized because they're 
@@ -271,7 +279,7 @@ public class CasSerializerSupport {
     public final IntVector[] indexedFSs;
 
     // only referenced FSs.
-    private final IntStack queue;
+    private final IntVector queue;
 
     
     // utilities for dealing with CAS list types
@@ -353,8 +361,8 @@ public class CasSerializerSupport {
       eh = CasSerializerSupport.this.eh;
 
       tsi = cas.getTypeSystemImpl();
-      visited_not_yet_written = new PositiveIntSet();
-      queue = new IntStack();
+      visited_not_yet_written = new PositiveIntSet_impl();
+      queue = new IntVector();
       indexedFSs = new IntVector[cas.getBaseSofaCount()];  // number of views
       listUtils = new ListUtils(cas, logger, eh);
       typeUsed = new BitSet();
@@ -366,7 +374,7 @@ public class CasSerializerSupport {
     	  throw exception;
       }
       isDelta = marker != null;
-      multiRefFSs = (trackMultiRefs) ? new IntHashSet() : null;
+      multiRefFSs = (trackMultiRefs) ? new PositiveIntSet_impl() : null;
     }
         
     // TODO: internationalize
@@ -661,7 +669,10 @@ public class CasSerializerSupport {
       if (!visited_not_yet_written.add(addr)) {
         // was already visited; means this FS has multiple references, either from FS feature(s) or indexes or both
         if (null != multiRefFSs) {
-          multiRefFSs.add(addr);
+          boolean wasAdded = multiRefFSs.add(addr);
+          if (wasAdded) {
+            queue.add(addr);  // if was in indexed set before, isn't in the queue set, but needs to be
+          }
         }
         return -1;
       }
@@ -708,7 +719,7 @@ public class CasSerializerSupport {
       if (typeCode == -1) {
         return;  
       }
-      queue.push(addr);
+      queue.add(addr);
       enqueueFeatures(addr, typeCode);
       // Also, for FSArrays enqueue the elements
       if (cas.isFSArrayType(typeCode)) { //TODO: won't get parameterized arrays??
@@ -757,9 +768,9 @@ public class CasSerializerSupport {
       int tailFeat = listUtils.getTailFeatCode(typeCode);
       boolean foundCycle = false;
       int curNode = listNode;
-      if (listNode == 14284) { // debug
-        System.out.println(listNode); //debug
-      }
+//      if (listNode == 14284) { // debug
+//        System.out.println(listNode); //debug
+//      }
       while (typeCode == neListType) {  // stop on end or 0
         if (!visited_not_yet_written.add(curNode)) {
           foundCycle = true;
@@ -796,9 +807,7 @@ public class CasSerializerSupport {
       
       // doing dynamic determination of multi-refs
       if (alreadyVisited) {
-        // if already enqueued, 
-        multiRefFSs.add(featVal); // mark as multi-ref'd
-        return false; // already enqueued, skip, prevent loops
+        return !multiRefFSs.contains(featVal); // enqueue in the "queue" section, first time this happens
       };
       return true;  // enqueue this item.  May or may not be eventually written embedded
                     // but we enqueue to track multi-use
@@ -815,6 +824,28 @@ public class CasSerializerSupport {
      *          true iff the enclosing FS (addr) is a list type
      */
     private void enqueueFeatures(int addr, int typeCode) throws SAXException {
+      
+      /**
+       * Handle FSArrays
+       */
+      if (typeCode == tsi.fsArrayTypeCode) {
+        final int array_size = cas.ll_getArraySize(addr);
+        int position = cas.getArrayStartAddress(addr);
+        
+        for (int j = 0; j < array_size; j++) {
+          final int fsRef = cas.getHeapValue(position++);
+          if (isFiltering) {
+            String typeName = tsi.ll_getTypeForCode(cas.getHeapValue(fsRef)).getName();
+            if (filterTypeSystem.getType(typeName) == null) {
+              continue;  // don't enqueue this type because it's filtered out
+            }
+          }
+          enqueue(fsRef);  
+        }
+        return;
+      }
+      
+      
       boolean insideListNode = listUtils.isListType(typeCode);
       int[] feats = tsi.ll_getAppropriateFeatures(typeCode);
       for (int feat : feats) {
@@ -856,8 +887,8 @@ public class CasSerializerSupport {
             //   unless already enqueued, in order to pick up any multiple refs
             final boolean alreadyVisited = visited_not_yet_written.contains(featVal);
             if (isMultiRef_enqueue(feat, featVal, alreadyVisited, false, false)) {
-              enqueue(featVal);
-            // otherwise, it is singly referenced and will be embedded
+              enqueue(featVal);  // will add to queue list 1st time multi-ref detected
+            // otherwise, it is singly referenced (so far) and will be embedded
             //   (or has already been enqueued, in dynamic embedding mode), so don't enqueue
             } else if (fsClass == LowLevelCAS.TYPE_CLASS_FSARRAY && !alreadyVisited) {
               // enqueue any FSs reachable from an FSArray
@@ -959,14 +990,16 @@ public class CasSerializerSupport {
      *   later).  The isWritten test prevents dupl writes
      */
     public void encodeQueued() throws Exception {
-      final int len = queue.size();
-      for (int i = 0; i < len; i++) {
-        final int addr = queue.get(i);
+      int[] queueArray = queue.toArray();
+      for (int addr : queueArray) {
         // for some serializers, things could be enqueued multiple times in the ref queue
         // so check if already written, and if so, skip
         //    Case where this happens: JSON serialization with dynamically determined single ref embedding
         //    - have to enqueue to check if multiple refs, even if embedding eventually
-        if (visited_not_yet_written.contains(addr)) { 
+        if (visited_not_yet_written.contains(addr)) {
+          if (null != multiRefFSs && !multiRefFSs.contains(addr)) {
+            continue;  // skip writing embeddable item (for JSON dynamic embedding) from Q; will be written from reference
+          }
           encodeFS(addr);
         }
       }
@@ -1055,39 +1088,43 @@ public class CasSerializerSupport {
       final int typeCode = cas.getHeapValue(addr);
 
       final int typeClass = classifyType(typeCode);
-      visited_not_yet_written.remove(addr);  // mark as written
-      csss.writeFsStart(addr, typeCode);
-
-      switch (typeClass) {
-        case LowLevelCAS.TYPE_CLASS_FS: 
-          csss.writeFs(addr, typeCode);
-          break;
-        
+      boolean isIndexId = csss.writeFsStart(addr, typeCode);
+      
+      if (!isIndexId && multiRefFSs != null && multiRefFSs.contains(addr)) {
+        csss.writeFsRef(addr);        
+      } else {
+        visited_not_yet_written.remove(addr);  // mark as written
+        switch (typeClass) {
+          case LowLevelCAS.TYPE_CLASS_FS: 
+            csss.writeFs(addr, typeCode);
+            break;
           
-        case TYPE_CLASS_INTLIST:
-        case TYPE_CLASS_FLOATLIST:
-        case TYPE_CLASS_STRINGLIST:
-        case TYPE_CLASS_FSLIST: 
-          csss.writeListsAsIndividualFSs(addr, typeCode);
-          break;
-                
-        case LowLevelCAS.TYPE_CLASS_FSARRAY:
-        case LowLevelCAS.TYPE_CLASS_INTARRAY:
-        case LowLevelCAS.TYPE_CLASS_FLOATARRAY:
-        case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY:
-        case LowLevelCAS.TYPE_CLASS_BYTEARRAY:
-        case LowLevelCAS.TYPE_CLASS_SHORTARRAY:
-        case LowLevelCAS.TYPE_CLASS_LONGARRAY:
-        case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY:
-        case LowLevelCAS.TYPE_CLASS_STRINGARRAY:
-          csss.writeArrays(addr, typeCode, typeClass);
-          break;
+            
+          case TYPE_CLASS_INTLIST:
+          case TYPE_CLASS_FLOATLIST:
+          case TYPE_CLASS_STRINGLIST:
+          case TYPE_CLASS_FSLIST: 
+            csss.writeListsAsIndividualFSs(addr, typeCode);
+            break;
+                  
+          case LowLevelCAS.TYPE_CLASS_FSARRAY:
+          case LowLevelCAS.TYPE_CLASS_INTARRAY:
+          case LowLevelCAS.TYPE_CLASS_FLOATARRAY:
+          case LowLevelCAS.TYPE_CLASS_BOOLEANARRAY:
+          case LowLevelCAS.TYPE_CLASS_BYTEARRAY:
+          case LowLevelCAS.TYPE_CLASS_SHORTARRAY:
+          case LowLevelCAS.TYPE_CLASS_LONGARRAY:
+          case LowLevelCAS.TYPE_CLASS_DOUBLEARRAY:
+          case LowLevelCAS.TYPE_CLASS_STRINGARRAY:
+            csss.writeArrays(addr, typeCode, typeClass);
+            break;
+          
+          default: 
+            throw new RuntimeException("Error classifying FS type.");
+        }
         
-        default: 
-          throw new RuntimeException("Error classifying FS type.");
+        csss.writeEndOfIndividualFs();
       }
-      
-      csss.writeEndOfIndividualFs();
     }
     
     int filterType(int addr) {

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java?rev=1636459&r1=1636458&r2=1636459&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java Mon Nov  3 22:17:22 2014
@@ -316,7 +316,7 @@ public class TypeSystemImpl implements T
 
   public int sofaNumFeatCode = LowLevelTypeSystem.UNKNOWN_TYPE_CODE;  // ref from another pkg
 
-  int sofaIdFeatCode = LowLevelTypeSystem.UNKNOWN_TYPE_CODE;
+  public int sofaIdFeatCode = LowLevelTypeSystem.UNKNOWN_TYPE_CODE;
 
   int sofaMimeFeatCode = LowLevelTypeSystem.UNKNOWN_TYPE_CODE;
 

Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java?rev=1636459&r1=1636458&r2=1636459&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java Mon Nov  3 22:17:22 2014
@@ -1227,9 +1227,10 @@ public class XmiCasSerializer {
     protected void addNameSpace(XmlElementName xmlElementName) {};
 
     @Override
-    protected void writeFsStart(int addr, int typeCode /* ignored */) {
+    protected boolean writeFsStart(int addr, int typeCode /* ignored */) {
       workAttrs.clear();
       addAttribute(workAttrs, ID_ATTR_NAME, cds.getXmiId(addr));
+      return false;  // ignored
     }
    
     /**
@@ -1280,6 +1281,10 @@ public class XmiCasSerializer {
 
     @Override
     protected void writeEndOfIndividualFs() {}
+
+    @Override
+    protected void writeFsRef(int addr) throws Exception {} // only for JSON, not used here
+     
   }
         
 //  // for testing