You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2016/09/07 21:33:09 UTC

svn commit: r1759710 - in /uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl: BinaryCasSerDes.java CASSerializer.java CommonSerDesSequential.java

Author: schor
Date: Wed Sep  7 21:33:09 2016
New Revision: 1759710

URL: http://svn.apache.org/viewvc?rev=1759710&view=rev
Log:
[UIMA-4663] clarify purpose of values stored in BinaryCasSerDes (only for plain binary) vs CommonSerDesSequential, and model the aux arrays for deserialization for binary.

Modified:
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java?rev=1759710&r1=1759709&r2=1759710&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java Wed Sep  7 21:33:09 2016
@@ -64,10 +64,17 @@ import org.apache.uima.util.CasLoadMode;
  * Binary (mostly non compressed) CAS deserialization
  * The methods in this class were originally part of the CASImpl, and were moved here to this class for v3
  * 
- * Binary non compressed CAS serialization is in class CASSerializer, but that class uses routine and data structures
+ * Binary non compressed CAS serialization is in class CASSerializer, but that class uses routines and data structures
  * in this class.
  * 
- * There is one instance of this class per CAS (shared by all views of that CAS).
+ * There is one instance of this class per CAS (shared by all views of that CAS), created at the same time the 
+ * CAS is created. 
+ * 
+ * This instance also holds data needed for binary serialization, and deserialization.
+ * For binary delta deserialization, it uses the data computed on a previous serialization, 
+ * or, if none, it re-computes it.  See scanAllFSsForBinarySerialization method.
+ * 
+ *   The data is computed lazily, and reset with cas reset.
  * 
  * Lifecycle: 
  *   created when a CAS (any view) is first created, as part of the shared view data for that CAS.
@@ -75,8 +82,6 @@ import org.apache.uima.util.CasLoadMode;
  *   
  *   Data created when non-delta serializing, in case needed when delta-deserializing later:
  *     xxxAuxAddr2fsa maps aux arrays to FSs
- *     
- *   Data created when non-delta deserializing, in case needed when delta-serializing later:
  *     heaps and nextXXXHeapAddrAfterMark (in this case mark is the end).
  *     
  *   Reset: 
@@ -176,10 +181,13 @@ public class BinaryCasSerDes {
 
   /**
    * Map from an aux addr starting address for an array of boolean/byte/short/long/double to the V3 FS.
+   *   key = simulated starting address in aux heap for the array
+   *   value = FS having that array
    * When deserializing a modification, used to find the v3 FS and the offset in the array to modify.
    * 
-   * created when serializing (in case receive delta deser back)
-   * updated when delta deserializing 
+   * created when serializing (in case receive delta deser back).
+   * created when delta deserializing if not available from previous serialization.
+   * updated when delta deserializing.
    * reset at end of delta deserializings because multiple mods not supported
    */
   final private Int2ObjHashMap<TOP> byteAuxAddr2fsa = new Int2ObjHashMap<>(TOP.class);  
@@ -523,7 +531,7 @@ public class BinaryCasSerDes {
       
       if (h.isCompressed) {
         if (TRACE_DESER) {
-          System.out.format("BinDeser version = %d%n", h.v);
+          System.out.format("BinDeser version = %d%n", Integer.valueOf(h.v));
         }
         if (h.form4) {
           (new BinaryCasSerDes4(baseCas.getTypeSystemImpl(), false))
@@ -603,7 +611,7 @@ public class BinaryCasSerDes {
    *     CAS(2) has updates - new FSs, and mods to existing ones
    *   CAS(2) -> delta binary ser -> delta binary deser -> CAS(1).
    *   
-   * V3 supports the above scenario by retaining information in CAS(2) at the
+   * V3 supports the above scenario by retaining some information in CAS(2) at the
    * end of the initial deserialization, including the model heap size/cellsUsed.
    *   - this is needed to properly do a compatible-with-v2 delta serialization.    
 
@@ -616,13 +624,12 @@ public class BinaryCasSerDes {
    * 
    * This method assumes a previous binary serialization was done, and the following data structures
    * are still valid (i.e. no CAS altering operations have been done)
-   *   (maybe these are reset: heap, stringHeap, byteHeap, shortHeap, longHeap)
-   *   csds, [string/byte/short/long]auxAddr2fs (for array mods)
+   *   (these are reset: heap, stringHeap, byteHeap, shortHeap, longHeap)
+   *   csds, 
+   *   [string/byte/short/long]auxAddr2fs (for array mods)
    *   nextHeapAddrAfterMark, next[string/byte/short/long]HeapAddrAfterMark
    *  
-   * @param dis data input stream
-   * @param swap true if byte order needs swapping
-   * @param delta true if delta binary deserialization being received
+   * @param h the Header (read by the caller)
    * @return the format of the incoming serialized data
    */
   private SerialFormat binaryDeserialization(Header h) {
@@ -664,7 +671,7 @@ public class BinaryCasSerDes {
       }
       if (TRACE_DESER) {
         System.out.format("BinDes Plain %s startPos: %,d mainHeapSize: %d%n", 
-            delta ? "Delta" : "", startPos, fsheapsz);
+            delta ? "Delta" : "", Integer.valueOf(startPos), Integer.valueOf(fsheapsz));
       }
             
       // add new heap slots
@@ -739,7 +746,7 @@ public class BinaryCasSerDes {
           modWords[i] = r.readInt();
         }
         if (TRACE_DESER) {
-          System.out.format("BinDes modified heap slot count: %,d%n", fsmodssz2 / 2);
+          System.out.format("BinDes modified heap slot count: %,d%n", Integer.valueOf(fsmodssz2 / 2));
         }
       } else {
         fsmodssz2 = 0; // not used but must be set to make "final" work
@@ -750,19 +757,19 @@ public class BinaryCasSerDes {
       // indexed FSs
       int fsindexsz = r.readInt();
       int[] fsindexes = new int[fsindexsz];
-      if (TRACE_DESER) System.out.format("BinDes indexedFSs count: %,d%n", fsindexsz);
+      if (TRACE_DESER) System.out.format("BinDes indexedFSs count: %,d%n", Integer.valueOf(fsindexsz));
       for (int i = 0; i < fsindexsz; i++) {
         fsindexes[i] = r.readInt();
         if (TRACE_DESER) {
-          if (i % 5 == 0) System.out.format("%n i: %5d ", i);
-          System.out.format("%15d ", fsindexes[i]);
+          if (i % 5 == 0) System.out.format("%n i: %5d ", Integer.valueOf(i));
+          System.out.format("%15d ", Integer.valueOf(fsindexes[i]));
         }
       }
       if (TRACE_DESER) System.out.println("");
 
       // byte heap
       int heapsz = r.readInt();
-      if (TRACE_DESER) System.out.format("BinDes ByteHeap size: %,d%n", heapsz);
+      if (TRACE_DESER) System.out.format("BinDes ByteHeap size: %,d%n", Integer.valueOf(heapsz));
       
       if (!delta) {
         byteHeap.heap = new byte[Math.max(16, heapsz)]; // must be > 0
@@ -778,7 +785,7 @@ public class BinaryCasSerDes {
 
       // short heap
       heapsz = r.readInt();
-      if (TRACE_DESER) System.out.format("BinDes ShortHeap size: %,d%n", heapsz);
+      if (TRACE_DESER) System.out.format("BinDes ShortHeap size: %,d%n", Integer.valueOf(heapsz));
       
       if (!delta) {
         shortHeap.heap = new short[Math.max(16, heapsz)]; // must be > 0
@@ -800,7 +807,7 @@ public class BinaryCasSerDes {
 
       // long heap
       heapsz = r.readInt();
-      if (TRACE_DESER) System.out.format("BinDes LongHeap size: %,d%n", heapsz);
+      if (TRACE_DESER) System.out.format("BinDes LongHeap size: %,d%n", Integer.valueOf(heapsz));
       
       if (!delta) {
         longHeap.heap = new long[Math.max(16, heapsz)]; // must be > 0
@@ -927,10 +934,7 @@ public class BinaryCasSerDes {
       longHeap = null;
       
       // cleared because only used for delta deser, for mods, and mods not allowed for multiple deltas
-      byteAuxAddr2fsa.clear();
-      shortAuxAddr2fsa.clear();
-      longAuxAddr2fsa.clear();     
-
+      clearAuxAddr2fsa();
     } catch (IOException e) {
       String msg = e.getMessage();
       if (msg == null) {
@@ -1323,18 +1327,20 @@ public class BinaryCasSerDes {
 
   
   /**
-   * Called when serializing a cas.
+   * Called when serializing a cas, or deserializing a delta CAS, if not saved in that case from a previous 
+   * binary serialization (in that case, the scan is done as if it is doing a non-delta serialization).
    * 
    * Initialize the serialization model for binary serialization in CASSerializer from a CAS
    * Do 2 scans, each by walking all the reachable FSs
    *   - The first one processes all fs (including for delta, those below the line)
    *      -- computes the fs to addr map and its inverse, based on the size of each FS.
+   *      -- done by CommonSerDesSequential class's "setup" method
    *      
    *   - The second one computes the values of the main and aux heaps and string heaps except for delta mods
    *      -- for delta, the heaps only have "new" values that binary serialization will write out as arrays
    *         --- mods are computed from FsChange info and added to the appropriate heaps, later  
    *
-   *         - for byte/short/long/string array use, compute auxAddr2fsa map. 
+   *         - for byte/short/long/string array use, compute auxAddr2fsa maps. 
    *           This is used when deserializing delta mod info, to locate the fs to update
    * 
    * For delta serialization, the heaps are populated only with the new values.
@@ -1350,16 +1356,20 @@ public class BinaryCasSerDes {
    * @param cs the CASSerializer instance used to record the results of the scan
    * @param mark null or the mark to use for separating the new from from the previously existing 
    *        used by delta cas.
+   * @return null or for delta, all the found FSs
    */
-  void scanAllFSsForBinarySerialization(MarkerImpl mark, CommonSerDesSequential csds) {
+  List<TOP> scanAllFSsForBinarySerialization(MarkerImpl mark, CommonSerDesSequential csds) {
     final boolean isMarkSet = mark != null;
 
+    List<TOP> all = null;
+    int prevHeapEnd = csds.getHeapEnd();  // used if mark is set
     if (isMarkSet) {
-      csds.setup(mark, csds.getHeapEnd());   // add new stuff to existing csds
-    }  // otherwise, it's set up using null, 1 as the arguments
+      
+      all = csds.setup(mark, csds.getHeapEnd());   // add new stuff to existing csds
+    }  // otherwise, it's set up already, using null, 1 as the arguments, when getCsds() is called
         
     // For delta, these heaps will start at 1, and only hold new items
-    heap = new Heap(csds.getHeapEnd());
+    heap = new Heap(isMarkSet ? (1 + csds.getHeapEnd() - prevHeapEnd) : csds.getHeapEnd());  
     byteHeap = new ByteHeap();
     shortHeap = new ShortHeap();
     longHeap = new LongHeap();
@@ -1367,18 +1377,31 @@ public class BinaryCasSerDes {
     
     if (!isMarkSet) {
       clearDeltaOffsets();  // set nextXXheapAfterMark to 0;
+      clearAuxAddr2fsa();
     }
 
-    List<TOP> itemsToExtract = isMarkSet ? CASImpl.filterAboveMark(csds.getSortedFSs(), mark) : csds.getSortedFSs();
+    List<TOP> itemsToExtract = csds.getSortedFSs(); 
+//        isMarkSet ? CASImpl.filterAboveMark(csds.getSortedFSs(), mark) : csds.getSortedFSs();
     for (TOP fs : itemsToExtract) {
       if (!isMarkSet || mark.isNew(fs)) {
         // skip extraction for FSs below the mark. 
         //   - updated slots will update aux heaps when delta mods are processed
         extractFsToV2Heaps(fs, isMarkSet, csds.fs2addr);
       }
-    }        
+    }
+    
+    return all;
   }
   
+//  /**
+//   * to support serializing addr in aux arrays for modifications below the mark,
+//   * scan to compute the starting address of each array that's below the mark
+//   * and build maps from Array FSs to aux array starting addresses
+//   */
+//  void scanAllFSsForBinaryDeltaSerialization(MarkerImpl mark, CommonSerDesSequential csds) {
+//      
+//  }
+//  
   /**
    * called in fs._id order to populate heaps from all FSs.
    * 
@@ -1422,7 +1445,8 @@ public class BinaryCasSerDes {
         
       case Slot_BooleanRef: {
           int baAddr = byteHeap .addBooleanArray(((BooleanArray)fs)._getTheArray());
-        heap.heap[i] = nextByteHeapAddrAfterMark + baAddr;
+          heap.heap[i] = nextByteHeapAddrAfterMark + baAddr;
+          byteAuxAddr2fsa.put(nextByteHeapAddrAfterMark + baAddr, fs);
 //        // hack to find first above-the-mark ref
 //        if (isMarkSet && baAddr < nextByteHeapAddrAfterMark) {
 //          nextByteHeapAddrAfterMark = baAddr;
@@ -1432,7 +1456,8 @@ public class BinaryCasSerDes {
         
       case Slot_ByteRef: {
           int baAddr = byteHeap .addByteArray   (((ByteArray   )fs)._getTheArray());
-        heap.heap[i] = nextByteHeapAddrAfterMark + baAddr;
+          heap.heap[i] = nextByteHeapAddrAfterMark + baAddr;
+          byteAuxAddr2fsa.put(nextByteHeapAddrAfterMark + baAddr, fs);
 //        // hack to find first above-the-mark ref
 //        if (isMarkSet && baAddr < nextByteHeapAddrAfterMark) {
 //          nextByteHeapAddrAfterMark = baAddr;
@@ -1442,17 +1467,20 @@ public class BinaryCasSerDes {
       case Slot_ShortRef: {
           int saAddr = shortHeap.addShortArray  (((ShortArray  )fs)._getTheArray());
           heap.heap[i] = nextShortHeapAddrAfterMark + saAddr;
+          shortAuxAddr2fsa.put(nextShortHeapAddrAfterMark + saAddr, fs);        
         }
         break;
         
       case Slot_LongRef: {
           int laAddr = longHeap .addLongArray   (((LongArray   )fs)._getTheArray());
           heap.heap[i] = nextLongHeapAddrAfterMark + laAddr;
+          longAuxAddr2fsa.put(nextLongHeapAddrAfterMark + laAddr, fs);        
         break;
         }
       case Slot_DoubleRef: {
           int laAddr = longHeap .addDoubleArray (((DoubleArray )fs)._getTheArray());
           heap.heap[i] = nextLongHeapAddrAfterMark + laAddr;
+          longAuxAddr2fsa.put(nextLongHeapAddrAfterMark + laAddr, fs);        
         break;
         }
       case Slot_HeapRef: 
@@ -1533,7 +1561,7 @@ public class BinaryCasSerDes {
     for (int heapIndex = startPos; heapIndex < heapsz; heapIndex += getFsSpaceReq(fs, type)) {
       type = tsi.getTypeForCode(heap.heap[heapIndex]);
       if (type == null) {
-        throw new CASRuntimeException(CASRuntimeException.deserialized_type_not_found, heap.heap[heapIndex]);
+        throw new CASRuntimeException(CASRuntimeException.deserialized_type_not_found, Integer.valueOf(heap.heap[heapIndex]));
       }
       if (type.isArray()) {
         final int len = heap.heap[heapIndex + arrayLengthFeatOffset];
@@ -1856,4 +1884,22 @@ public class BinaryCasSerDes {
     nextShortHeapAddrAfterMark  = 0;
     nextLongHeapAddrAfterMark   = 0;   
   }
+  
+  private void clearAuxAddr2fsa() {
+    byteAuxAddr2fsa.clear();
+    shortAuxAddr2fsa.clear();
+    longAuxAddr2fsa.clear();    
+  }
+  /**
+   * called by cas reset
+   */
+  public void clear() {
+    clearDeltaOffsets();
+    clearAuxAddr2fsa();
+    heap = null;
+    byteHeap = null;
+    shortHeap = null;
+    longHeap = null;
+    stringHeap = null;
+  }
 }
\ No newline at end of file

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java?rev=1759710&r1=1759709&r2=1759710&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java Wed Sep  7 21:33:09 2016
@@ -23,17 +23,20 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.Serializable;
+import java.util.ArrayList;
 import java.util.BitSet;
+import java.util.List;
 
 import org.apache.uima.cas.CASRuntimeException;
 import org.apache.uima.cas.Marker;
-import org.apache.uima.cas.function.IntConsumer_withIOException;
+import org.apache.uima.cas.function.Consumer_T_withIOException;
 import org.apache.uima.cas.impl.CASImpl.FsChange;
-import org.apache.uima.internal.util.IntVector;
+import org.apache.uima.cas.impl.SlotKinds.SlotKind;
 import org.apache.uima.internal.util.Misc;
 import org.apache.uima.internal.util.Obj2IntIdentityHashMap;
 import org.apache.uima.jcas.cas.BooleanArray;
 import org.apache.uima.jcas.cas.ByteArray;
+import org.apache.uima.jcas.cas.CommonArray;
 import org.apache.uima.jcas.cas.DoubleArray;
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.cas.FloatArray;
@@ -84,6 +87,15 @@ public class CASSerializer implements Se
 
   static final long serialVersionUID = -7972011651957420295L;
 
+  static class AddrPlusValue {
+    final int addr;   // heap or aux heap addr
+    final long value;  // boolean, byte, short, long, double value
+    AddrPlusValue(int addr, long value) {
+      this.addr = addr;
+      this.value = value;
+    }
+  }
+  
   // The heap itself.
   public int[] heapArray = null;
 
@@ -140,7 +152,7 @@ public class CASSerializer implements Se
   public void addCAS(CASImpl cas, boolean addMetaData) {
     BinaryCasSerDes bcsd = cas.getBinaryCasSerDes();
     final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(cas.getBaseCAS(), false);  // saves the csds in the cas
-    scanAllFSsForBinarySerialization(bcsd, null, csds); // populates the arrays
+    bcsd.scanAllFSsForBinarySerialization(null, csds);  // no mark
     this.fsIndex = bcsd.getIndexedFSs(csds.fs2addr);  // must follow scanAll...
     
     if (addMetaData) {
@@ -261,7 +273,7 @@ public class CASSerializer implements Se
     final BinaryCasSerDes bcsd = cas.getBinaryCasSerDes();
     
     final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(cas.getBaseCAS(), false);  // saves the csds in the cas, used for delta
-    scanAllFSsForBinarySerialization(bcsd, null, csds); // populates the arrays
+    bcsd.scanAllFSsForBinarySerialization(null, csds);  // no mark
     
     try {
 
@@ -445,8 +457,47 @@ public class CASSerializer implements Se
 //    if (csds.getHeapEnd() == 0) {
 //      System.out.println("debug");
 //    }
-    scanAllFSsForBinarySerialization(bcsd, mark, csds); // populates the arrays
-
+    final Obj2IntIdentityHashMap<TOP> fs2auxOffset = new Obj2IntIdentityHashMap<TOP>(TOP.class, TOP._singleton);
+    
+    List<TOP> all = bcsd.scanAllFSsForBinarySerialization(mark, csds);
+    
+    int byteOffset = 1;
+    int shortOffset = 1;
+    int longOffset = 1;
+    
+    // scan all below mark and set up maps from aux array FSs to the offset to where the array starts in the modelled aux heap
+    for (TOP fs : all) {
+      if (trackingMark.isNew(fs)) {
+        break;
+      }
+      if (fs instanceof CommonArray) {
+        CommonArray ca = (CommonArray) fs;
+        SlotKind kind = fs._getTypeImpl().getComponentSlotKind();
+        switch (kind) {
+        case Slot_BooleanRef:
+        case Slot_ByteRef :
+          fs2auxOffset.put(fs, byteOffset);
+          byteOffset += ca.size();
+          break;
+        case Slot_ShortRef:
+          fs2auxOffset.put(fs,  shortOffset);
+          shortOffset += ca.size();
+          break;
+        case Slot_LongRef:
+        case Slot_DoubleRef:
+          fs2auxOffset.put(fs, longOffset);
+          longOffset += ca.size();
+          break;
+        default:
+        } // end of switch
+      } // end of if commonarray
+      else {  // fs has feature slots 
+        // model long and double refs which use up the long aux heap for 1 cell
+        TypeImpl ti = fs._getTypeImpl();
+        longOffset += ti.getNbrOfLongOrDoubleFeatures();
+      }
+    } // end of for
+    
     try {
       DataOutputStream dos = new DataOutputStream(ostream);
 
@@ -475,24 +526,14 @@ public class CASSerializer implements Se
       // we do this before the strings or aux arrays are written out, because this 
       // could make additions to those.
 
-      // addresses are in terms of modeled v2 arrays
-      IntVector chgMainHeapAddr = new IntVector(); 
-      IntVector chgMainHeapValues = new IntVector();
-      
-      IntVector chgByteAddr = new IntVector();
-      ByteHeap  chgByteValues = new ByteHeap();
-      
-      IntVector chgShortAddr = new IntVector();
-      ShortHeap chgShortValues = new ShortHeap();
-      
-      IntVector chgLongAddr  = new IntVector();
-      LongHeap  chgLongValues = new LongHeap();
+      // addresses are in terms of modeled v2 arrays, as absolute addr in the aux arrays, and values
+      List<AddrPlusValue> chgMainAvs = new ArrayList<>();
+      List<AddrPlusValue> chgByteAvs = new ArrayList<>();
+      List<AddrPlusValue> chgShortAvs = new ArrayList<>();
+      List<AddrPlusValue> chgLongAvs = new ArrayList<>();
 
-      scanModifications(bcsd, csds, cas.getModifiedFSList(), 
-          chgMainHeapAddr, chgMainHeapValues,
-          chgByteAddr,     chgByteValues,
-          chgShortAddr,    chgShortValues,
-          chgLongAddr,      chgLongValues); 
+      scanModifications(bcsd, csds, cas.getModifiedFSList(), fs2auxOffset,
+          chgMainAvs, chgByteAvs, chgShortAvs, chgLongAvs);
 
       // output the new strings
       StringHeapDeserializationHelper shdh = bcsd.stringHeap.serialize(1);
@@ -503,11 +544,11 @@ public class CASSerializer implements Se
       // this is output in a way that is the total number of slots changed == 
       //   the sum over all fsChanges of
       //     for each fsChange, the number of slots (heap-sited-array or feature) modified
-      final int modHeapSize = chgMainHeapAddr.size();
+      final int modHeapSize = chgMainAvs.size();
       dos.writeInt(modHeapSize);  //num modified
-      for (int i = 0; i < modHeapSize; i++) {
-        dos.writeInt(chgMainHeapAddr  .get(i));        
-        dos.writeInt(chgMainHeapValues.get(i));        
+      for (AddrPlusValue av : chgMainAvs) {
+        dos.writeInt(av.addr);        
+        dos.writeInt((int)av.value);        
       }
 
       // output the index FSs
@@ -547,24 +588,24 @@ public class CASSerializer implements Se
       }
       
       // 8bit heap modified cells
-      writeMods(chgByteAddr, dos, i -> dos.writeByte(chgByteValues.heap[i]));
+      writeMods(chgByteAvs, dos, av -> dos.writeByte((byte)av.value));
 
       // word alignment
-      align = (4 - (chgByteAddr.size() % 4)) % 4;
+      align = (4 - (chgByteAvs.size() % 4)) % 4;
       for (int i = 0; i < align; i++) {
         dos.writeByte(0);
       }
 
       // 16bit heap modified cells
-      writeMods(chgShortAddr, dos, i -> dos.writeShort(chgShortValues.heap[i]));
+      writeMods(chgShortAvs, dos, av -> dos.writeShort((short)av.value));
 
       // word alignment
-      if (chgShortAddr.size() % 2 != 0) {
+      if (chgShortAvs.size() % 2 != 0) {
         dos.writeShort(0);
       }
 
       // 64bit heap modified cells
-      writeMods(chgLongAddr, dos, i -> dos.writeLong(chgLongValues.heap[i]));      
+      writeMods(chgLongAvs, dos, av -> dos.writeLong(av.value));     
       
     } catch (IOException e) {
       throw new CASRuntimeException(CASRuntimeException.BLOB_SERIALIZATION, e.getMessage());
@@ -572,30 +613,29 @@ public class CASSerializer implements Se
 
   }
   
-  private void writeMods(IntVector chgAddr, DataOutputStream dos, IntConsumer_withIOException writeValue) throws IOException  {
-    int size = chgAddr.size();
+  private void writeMods(
+      List<AddrPlusValue> avs, 
+      DataOutputStream dos, 
+      Consumer_T_withIOException<AddrPlusValue> writeValue) throws IOException  {
+    int size = avs.size();
     dos.writeInt(size);
-    for (int i = 0; i < size; i++) {
-      dos.writeInt(chgAddr.get(i));
+    for (AddrPlusValue av : avs) {
+      dos.writeInt(av.addr);
     }
-    for (int i = 1; i <= size; i++) {  // <= because start loop at 1
-      writeValue.accept(i);
-      // example
-      // dos.writeLong(chgLongValues.heap[i]);
+    for (AddrPlusValue av : avs) {   
+      writeValue.accept(av);
     }   
   }
   /**
    * The offset in the modeled heaps:
-   *   For aux arrays:
-   *     fs mapto mainHeapAddr, fetch root of aux array from slot + 2, add index
-   *   For main heap arrays:
-   *     fs mapto mainHeapAddr, slot + 2 + index    
    * @param index the 0-based index into the array
    * @param fs the feature structure representing the array
    * @return the addr into an aux array or main heap
    */
-  private static int convertArrayIndexToAuxHeapAddr(BinaryCasSerDes bcsd, int index, TOP fs, Obj2IntIdentityHashMap<TOP> fs2addr) {
-    return bcsd.heap.heap[fs2addr.get(fs) + 2] + index;
+  private static int convertArrayIndexToAuxHeapAddr(BinaryCasSerDes bcsd, int index, TOP fs, Obj2IntIdentityHashMap<TOP> fs2auxOffset) {
+    int offset = fs2auxOffset.get(fs);
+    assert offset > 0;
+    return offset; 
   }
 
   private static int convertArrayIndexToMainHeapAddr(int index, TOP fs, Obj2IntIdentityHashMap<TOP> fs2addr) {
@@ -609,18 +649,22 @@ public class CASSerializer implements Se
    * 
    * A prescan approach is needed in order to write the number of modifications preceeding the 
    *   write of the values (which unfortunately were written to the same stream in V2).
-   * @param bcsd -
-   * @param cas -
-   * @param chgMainHeapAdd -
-   * @param chgByteAddr -
-   * @param chgShortAddr -
-   * @param chgLongAddr -
+   * @param bcsd holds the model needed for v2 aux arrays
+   * @param cas the cas to use for the delta serialization
+   * @param chgMainHeapAddr an ordered collection of changed addresses as an array for the main heap
+   * @param chgByteAddr an ordered collection of changed addresses as an array for the aux byte heap
+   * @param chgShortAddr an ordered collection of changed addresses as an array for the aus short heap
+   * @param chgLongAddr an ordered collection of changed addresses as an array for the aux long heap
+   * 
+   * @param chgMainHeapValue corresponding values
    */
-  static void scanModifications(BinaryCasSerDes bcsd, CommonSerDesSequential csds, FsChange[] fssModified,
-      IntVector chgMainHeapAddr, IntVector chgMainHeapValue,
-      IntVector chgByteAddr,     ByteHeap  chgByteValue,
-      IntVector chgShortAddr,    ShortHeap chgShortValue,
-      IntVector chgLongAddr,     LongHeap  chgLongValue) {
+  static void scanModifications(BinaryCasSerDes bcsd, CommonSerDesSequential csds, FsChange[] fssModified, 
+      Obj2IntIdentityHashMap<TOP> fs2auxOffset,
+      List<AddrPlusValue> chgMainAvs, 
+      List<AddrPlusValue> chgByteAvs, 
+      List<AddrPlusValue> chgShortAvs, 
+      List<AddrPlusValue> chgLongAvs 
+      ) {
 
     // scan the sorted mods to precompute the various change items:
     //   changed main heap: addr and new slot value
@@ -635,67 +679,67 @@ public class CASSerializer implements Se
       if (fsChange.arrayUpdates != null) {
         switch(type.getComponentSlotKind()) {
         
-        case Slot_Boolean: 
+        case Slot_BooleanRef: 
           fsChange.arrayUpdates.forAllInts(index -> {
-            chgByteAddr.add(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2addr));
-            chgByteValue.addByte((((BooleanArray)fs).get(index) ? (byte)1 : (byte)0));
+            chgByteAvs.add(new AddrPlusValue(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2auxOffset), 
+                ((BooleanArray)fs).get(index) ? 1 : 0));
           }); 
           break;
         
-        case Slot_Byte:
+        case Slot_ByteRef:
           fsChange.arrayUpdates.forAllInts(index -> {
-            chgByteAddr.add(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2addr));
-            chgByteValue.addByte(((ByteArray)fs).get(index));
+            chgByteAvs.add(new AddrPlusValue(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2auxOffset), 
+                ((ByteArray)fs).get(index)));
           }); 
           break;
 
-        case Slot_Short:
+        case Slot_ShortRef:
           fsChange.arrayUpdates.forAllInts(index -> {
-            chgShortAddr.add(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2addr));
-            chgShortValue.addShort(((ShortArray)fs).get(index));
+            chgShortAvs.add(new AddrPlusValue(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2auxOffset), 
+                ((ShortArray)fs).get(index)));
           }); 
           break;
         
         case Slot_LongRef:
           fsChange.arrayUpdates.forAllInts(index -> {
-            chgLongAddr.add(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2addr));
-            chgLongValue.addLong(((LongArray)fs).get(index));
+            chgLongAvs.add(new AddrPlusValue(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2auxOffset), 
+                ((LongArray)fs).get(index)));
           }); 
           break;
 
         case Slot_DoubleRef:
           fsChange.arrayUpdates.forAllInts(index -> {
-            chgLongAddr.add(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2addr));
-            chgLongValue.addLong(CASImpl.double2long(((DoubleArray)fs).get(index)));
+            chgLongAvs.add(new AddrPlusValue(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2auxOffset), 
+                CASImpl.double2long(((DoubleArray)fs).get(index))));
           }); 
           break;
         
         // heap stored arrays
         case Slot_Int:
           fsChange.arrayUpdates.forAllInts(index -> {
-            chgMainHeapAddr.add(convertArrayIndexToMainHeapAddr(index, fs, fs2addr));
-            chgMainHeapValue.add(((IntegerArray)fs).get(index));
+            chgMainAvs.add(new AddrPlusValue(convertArrayIndexToMainHeapAddr(index, fs, fs2addr), 
+                ((IntegerArray)fs).get(index)));
           });
           break;
+
         case Slot_Float:
           fsChange.arrayUpdates.forAllInts(index -> {
-            chgMainHeapAddr.add(convertArrayIndexToMainHeapAddr(index, fs, fs2addr));
-            chgMainHeapValue.add(CASImpl.float2int(((FloatArray)fs).get(index)));
+            chgMainAvs.add(new AddrPlusValue(convertArrayIndexToMainHeapAddr(index, fs, fs2addr),
+                CASImpl.float2int(((FloatArray)fs).get(index))));
           });
           break;
+
         case Slot_StrRef:
           fsChange.arrayUpdates.forAllInts(index -> {
-            chgMainHeapAddr.add(convertArrayIndexToMainHeapAddr(index, fs, fs2addr));
             int v = bcsd.nextStringHeapAddrAfterMark + bcsd.stringHeap.addString(((StringArray)fs).get(index));
-            chgMainHeapValue.add(v);
+            chgMainAvs.add(new AddrPlusValue(convertArrayIndexToMainHeapAddr(index, fs, fs2addr), v));
           });
           break;
 
         case Slot_HeapRef:
           fsChange.arrayUpdates.forAllInts(index -> {
-            chgMainHeapAddr.add(convertArrayIndexToMainHeapAddr(index, fs, fs2addr));
             TOP tgtFs = ((FSArray)fs).get(index);
-            chgMainHeapValue.add(fs2addr.get(tgtFs));
+            chgMainAvs.add(new AddrPlusValue(convertArrayIndexToMainHeapAddr(index, fs, fs2addr), fs2addr.get(tgtFs)));
           });
           break;
 
@@ -705,35 +749,36 @@ public class CASSerializer implements Se
         BitSet fm = fsChange.featuresModified;
         int offset = fm.nextSetBit(0);
         while (offset >= 0) {
-          chgMainHeapAddr.add(csds.fs2addr.get(fs) + offset + 1);  // skip over type code);
+          int addr = csds.fs2addr.get(fs) + offset + 1;  // skip over type code);
+          int value = 0;
+
           FeatureImpl feat = type.getFeatureImpls()[offset];
 
           switch (feat.getSlotKind()) {
-          case Slot_Boolean: chgMainHeapValue.add(fs._getBooleanValueNc(feat) ? 1 : 0); break;
+          case Slot_Boolean: value = fs._getBooleanValueNc(feat) ? 1 : 0; break;
             
-          case Slot_Byte:    chgMainHeapValue.add(fs._getByteValueNc(feat)); break;
-          case Slot_Short:   chgMainHeapValue.add(fs._getShortValueNc(feat)); break;
-          case Slot_Int:     chgMainHeapValue.add(fs._getIntValueNc(feat)); break;
-          case Slot_Float:   chgMainHeapValue.add(CASImpl.float2int(fs._getFloatValueNc(feat))); break;
+          case Slot_Byte:    value = fs._getByteValueNc(feat); break;
+          case Slot_Short:   value = fs._getShortValueNc(feat); break;
+          case Slot_Int:     value = fs._getIntValueNc(feat); break;
+          case Slot_Float:   value = CASImpl.float2int(fs._getFloatValueNc(feat)); break;
           case Slot_LongRef: {
-            int v = bcsd.nextLongHeapAddrAfterMark + bcsd.longHeap.addLong(fs._getLongValueNc(feat));
-            chgMainHeapValue.add(v);
+            value = bcsd.nextLongHeapAddrAfterMark + bcsd.longHeap.addLong(fs._getLongValueNc(feat));
             break;
           }
           case Slot_DoubleRef: {
-            int v = bcsd.nextLongHeapAddrAfterMark + bcsd.longHeap.addLong(CASImpl.double2long(fs._getDoubleValueNc(feat)));
-            chgMainHeapValue.add(v);
+            value = bcsd.nextLongHeapAddrAfterMark + bcsd.longHeap.addLong(CASImpl.double2long(fs._getDoubleValueNc(feat)));
             break;
           }
           case Slot_StrRef: {
-            int v = bcsd.nextStringHeapAddrAfterMark + bcsd.stringHeap.addString(fs._getStringValueNc(feat));
-            chgMainHeapValue.add(v);
+            value = bcsd.nextStringHeapAddrAfterMark + bcsd.stringHeap.addString(fs._getStringValueNc(feat));
             break;
           }
-          case Slot_HeapRef: chgMainHeapValue.add(fs2addr.get(fs._getFeatureValueNc(feat))); break;
+          case Slot_HeapRef: value = fs2addr.get(fs._getFeatureValueNc(feat)); break;
           default: Misc.internalError();
           } // end of switch
           
+          chgMainAvs.add(new AddrPlusValue(addr, value));
+          
           offset = fm.nextSetBit(offset + 1);
         } // loop over changed feature offsets         
       } // end of features-modified case
@@ -794,18 +839,7 @@ public class CASSerializer implements Se
   long[] getLongArray() {
     return this.longHeapArray;
   }
-  
-  /**
-   * For delta serialization, 
-   *   - scans all FSs to compute addr2fs and fs2addr tables
-   *   - scans new FSs to compute delta heap, aux heap, and strings to serialize
-   * @param bcsd -
-   * @param mark null or the mark if delta serialization
-   */
-  private void scanAllFSsForBinarySerialization(BinaryCasSerDes bcsd, MarkerImpl mark, CommonSerDesSequential csds) {
-    bcsd.scanAllFSsForBinarySerialization(mark, csds);
-  }
-  
+    
   private void copyHeapsToArrays(BinaryCasSerDes bcsd) {
     this.heapArray = bcsd.heap.toArray();
     this.byteHeapArray = bcsd.byteHeap.toArray();

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java?rev=1759710&r1=1759709&r2=1759710&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java Wed Sep  7 21:33:09 2016
@@ -19,12 +19,10 @@
 
 package org.apache.uima.cas.impl;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 
-import org.apache.uima.cas.function.Consumer_T_withIOException;
 import org.apache.uima.internal.util.Int2ObjHashMap;
 import org.apache.uima.internal.util.Misc;
 import org.apache.uima.internal.util.Obj2IntIdentityHashMap;
@@ -79,10 +77,9 @@ public class CommonSerDesSequential {
   /**
    * The FSs in this list are not necessarily sequential, but is in ascending (simulated heap) order,
    *   needed for V2 compatibility of serialized forms.
-   * This is populated from the main CAS's id-to-fs map, which is accessed once;
-   *   Subsequent accessing of that could return different lists due to an intervening Garbage Collection.
+   * This is populated either during deserialization, or for serialization, from indexed + reachable.
    *   
-   * Before accessing this, any pending items must be merged.  
+   * Before accessing this, any pending items must be merged (sorting done lazily)  
    */
   final private List<TOP> sortedFSs = new ArrayList<>();  // holds the FSs sorted by id
   
@@ -137,24 +134,39 @@ public class CommonSerDesSequential {
     heapEnd = 0;
   }
   
-  void setup(MarkerImpl mark, int fromAddr) {
+  /**
+   * Scan all indexed + reachable FSs, sorted, and
+   *   - create two maps from those to/from the int offsets in the simulated main heap
+   *   - add all the (filtered - above the mark) FSs to the sortedFSs
+   *   - set the heapEnd
+   * @param mark null or the mark
+   * @param fromAddr often 1 but sometimes the mark next fsid
+   * @return all (not filtered) FSs sorted
+   */
+  List<TOP> setup(MarkerImpl mark, int fromAddr) {
     if (mark == null) {
       clear();
     }
     // local value as "final" to permit use in lambda below
-    final int[] nextAddr = {fromAddr};
+    int nextAddr = fromAddr;
     if (TRACE_SETUP) System.out.println("Cmn serDes sequential setup called by: " + Misc.getCaller());
 
-    List<TOP> allAboveMark = baseCas.walkReachablePlusFSsSorted(fs -> {
-          addFS1(fs, nextAddr[0]);
-          if (TRACE_SETUP) {
-            System.out.format("Cmn serDes sequential setup: add FS id: %,4d addr: %,5d  type: %s%n", fs._id, nextAddr[0], fs._getTypeImpl().getShortName());
-          }
-          nextAddr[0] += BinaryCasSerDes.getFsSpaceReq(fs, fs._getTypeImpl());  
-        }, mark, null, null);
+    List<TOP> all =  new AllFSs(baseCas).getAllFSsSorted();
+    List<TOP> filtered = CASImpl.filterAboveMark(all, mark);
+    for (TOP fs : filtered) {
+      addFS1(fs, nextAddr);   // doesn't update sortedFSs, that will be done below in batch
+      if (TRACE_SETUP) {
+          System.out.format("Cmn serDes sequential setup: add FS id: %,4d addr: %,5d  type: %s%n", 
+              Integer.valueOf(fs._id), 
+              Integer.valueOf(nextAddr), 
+              fs._getTypeImpl().getShortName());
+      }
+      nextAddr += BinaryCasSerDes.getFsSpaceReq(fs, fs._getTypeImpl());  
+    }
     
-    sortedFSs.addAll(allAboveMark);
-    heapEnd = nextAddr[0];
+    sortedFSs.addAll(filtered);
+    heapEnd = nextAddr;
+    return all;
 //    if (heapEnd == 0) {
 //      System.out.println("debug");
 //    }
@@ -172,6 +184,9 @@ public class CommonSerDesSequential {
 //    }
 //  }
 //  
+  /**
+   * @return sorted FSs above mark if mark set, otherwise all, sorted
+   */
   List<TOP> getSortedFSs() {
     if (pending.size() != 0) {
       merge();