You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2016/05/25 14:59:52 UTC

svn commit: r1745499 [1/2] - in /uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima: cas/impl/ jcas/cas/

Author: schor
Date: Wed May 25 14:59:52 2016
New Revision: 1745499

URL: http://svn.apache.org/viewvc?rev=1745499&view=rev
Log:
[UIMA-4674] Automatically set the sofa in an initial view not having the sofa, when needed because of setting the annotationbase sofa ref.  Refactor the test for disallowing setting the sofa to the AnnotationBase class so it only runs there.

Added:
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/AllFSs.java
Modified:
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSIndexRepositoryImpl.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/FeatureStructureImplC.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Id2FS.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/OutOfTypeSystemData.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XCASDeserializer.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XCASSerializer.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiSerializationSharedData.java
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/jcas/cas/AnnotationBase.java

Added: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/AllFSs.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/AllFSs.java?rev=1745499&view=auto
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/AllFSs.java (added)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/AllFSs.java Wed May 25 14:59:52 2016
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.cas.impl;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.function.Predicate;
+import java.util.stream.Stream;
+
+import org.apache.uima.internal.util.PositiveIntSet;
+import org.apache.uima.internal.util.PositiveIntSet_impl;
+import org.apache.uima.jcas.cas.CommonArray;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.cas.TOP;
+
+/**
+ * support for collecting all FSs in a CAS
+ *
+ */
+class AllFSs {
+  
+  final CASImpl cas;
+  final private MarkerImpl mark;
+  final private PositiveIntSet foundFSs = new PositiveIntSet_impl(4096, 1, 4096);
+  final private PositiveIntSet foundFSsBelowMark;
+  final private ArrayList<TOP> toBeScanned = new ArrayList<TOP>();
+  final private Predicate<TOP> includeFilter;
+  final private CasTypeSystemMapper typeMapper;
+  
+  AllFSs(CASImpl cas, MarkerImpl mark, Predicate<TOP> includeFilter, CasTypeSystemMapper typeMapper) {
+    this.cas = cas;
+    this.mark = mark;
+    foundFSsBelowMark = (mark != null) ? new PositiveIntSet_impl(1024, 1, 1024) : null;
+    this.includeFilter = includeFilter;
+    this.typeMapper = typeMapper;
+    
+    getAllIndexedFSsAllViews();
+  }
+  
+  PositiveIntSet getAllBelowMark() {
+    return foundFSsBelowMark;
+  }
+  
+  PositiveIntSet getAllNew() {
+    return foundFSs;
+  }
+  
+  ArrayList<TOP> getAllFSs() {
+    return toBeScanned;
+  }
+    
+  ArrayList<TOP> getAllFSsSorted() {
+    Collections.sort(toBeScanned);
+    return toBeScanned;
+  }
+  
+  void getAllIndexedFSsAllViews() {
+    cas.forAllSofas(sofa -> enqueueFS(sofa));
+    cas.forAllViews(view -> 
+       getFSsForView(view.indexRepository.<TOP>getAllIndexedFS(cas.getTypeSystemImpl().topType).asStream()));
+    for (int i = 0; i < toBeScanned.size(); i++) {
+      enqueueFeatures(toBeScanned.get(i));
+    }
+  }
+  
+  private void getFSsForView(Stream<TOP> fss) {
+    fss.forEach(fs -> enqueueFS(fs));
+  }
+  
+  private void enqueueFS(TOP fs) {
+    if (null == fs || (includeFilter != null && !includeFilter.test(fs))) {
+      return;
+    }
+
+    final int id = fs._id;
+    
+    if (mark == null || mark.isNew(fs)) { // separately track items below the line
+      if (!foundFSs.contains(id)) {
+        foundFSs.add(id);
+        toBeScanned.add(fs);
+      }
+    } else {
+      if (!foundFSsBelowMark.contains(id)) {
+        foundFSsBelowMark.add(id);
+        toBeScanned.add(fs);
+      }
+    }
+  }
+    
+  private void enqueueFeatures(TOP fs) {
+    if (fs instanceof FSArray) {
+      for (TOP item : ((FSArray)fs)._getTheArray()) {
+        enqueueFS(item);
+      }
+      return;
+    }
+    
+    // not an FS Array
+    if (fs instanceof CommonArray) {
+      return;  // no refs
+    }
+  
+    final TypeImpl srcType = fs._getTypeImpl();
+    for (FeatureImpl srcFeat : srcType.getFeatureImpls()) {
+      if (typeMapper != null) {
+        FeatureImpl tgtFeat = typeMapper.getTgtFeature(srcType, srcFeat);
+        if (tgtFeat == null) {
+          continue;  // skip enqueue if not in target
+        }
+      } 
+      if (srcFeat.getRangeImpl().isRefType) {
+        enqueueFS(fs._getFeatureValueNc(srcFeat));
+      }
+    }   
+  }
+}

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java?rev=1745499&r1=1745498&r2=1745499&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java Wed May 25 14:59:52 2016
@@ -29,6 +29,7 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Set;
 import java.util.function.Consumer;
+import java.util.function.IntConsumer;
 import java.util.function.IntFunction;
 
 import org.apache.uima.cas.CAS;
@@ -216,7 +217,9 @@ public class BinaryCasSerDes {
    */
   void reinit(int[] heapMetadata, int[] heapArray, String[] stringTable, int[] fsIndex,
       byte[] byteHeapArray, short[] shortHeapArray, long[] longHeapArray) {
-    CommonSerDesSequential csds = new CommonSerDesSequential(baseCas);
+    CommonSerDesSequential csds = new CommonSerDesSequential(baseCas);  // for non Delta case, not held on to
+            // compare with compress form 4, which does cas.getCsds() or cas.newCsds() which saves it in cas.svd
+    csds.setup(null, 1);
     heap = new Heap();
     byteHeap = new ByteHeap();
     shortHeap = new ShortHeap();
@@ -506,7 +509,7 @@ public class BinaryCasSerDes {
     
     final DataInputStream dis = r.dis;
     
-    final CommonSerDesSequential csds;
+    final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(baseCas, delta);
     
     if (delta) {
       if (nextHeapAddrAfterMark == 0 ||
@@ -514,7 +517,6 @@ public class BinaryCasSerDes {
           heap.getCellsUsed() <=1) {
         Misc.internalError();  // can't deserialize without a previous binary serialization for this CAS
       }
-      csds = baseCas.getCsds();
     } else {
       if (heap == null) heap = new Heap(); else heap.reset();
       if (byteHeap == null) byteHeap = new ByteHeap(); else byteHeap.reset();
@@ -522,7 +524,6 @@ public class BinaryCasSerDes {
       if (longHeap == null) longHeap = new LongHeap(); else longHeap.reset();
       if (stringHeap == null) stringHeap = new StringHeap(); else stringHeap.reset();
       clearDeltaOffsets();
-      csds = baseCas.newCsds();
     } 
     
     try {
@@ -1094,14 +1095,13 @@ public class BinaryCasSerDes {
       }
     }
   }
-  
+    
   void addIdsToIntVector(Set<TOP> fss, IntVector v, Obj2IntIdentityHashMap<TOP> fs2addr) {
     v.add(fss.size());
     for (TOP fs : fss) {
       v.add(fs2addr.get(fs));
     }
   }
-
   
   //Delta IndexedFSs format:
   // number of views
@@ -1231,14 +1231,11 @@ public class BinaryCasSerDes {
     final boolean isMarkSet = mark != null;
 
     if (isMarkSet) {
-      csds.setup(mark.getNextFSId(), csds.getHeapEnd()); 
-    } else {
-      csds.clear();
-      csds.setup();
-    }
+      csds.setup(mark, csds.getHeapEnd());   // add new stuff to existing csds
+    }  // otherwise, it's set up using null, 1 as the arguments
         
     // For delta, these heaps will start at 1, and only hold new items
-    heap = new Heap();
+    heap = new Heap(csds.getHeapEnd());
     byteHeap = new ByteHeap();
     shortHeap = new ShortHeap();
     longHeap = new LongHeap();
@@ -1248,7 +1245,8 @@ public class BinaryCasSerDes {
       clearDeltaOffsets();  // set nextXXheapAfterMark to 0;
     }
 
-    for (TOP fs : csds.getSortedFSs()) {
+    List<TOP> itemsToExtract = isMarkSet ? CASImpl.filterAboveMark(csds.getSortedFSs(), mark) : csds.getSortedFSs();
+    for (TOP fs : itemsToExtract) {
       if (!isMarkSet || mark.isNew(fs)) {
         // skip extraction for FSs below the mark. 
         //   - updated slots will update aux heaps when delta mods are processed
@@ -1511,7 +1509,9 @@ public class BinaryCasSerDes {
         } else {
           fs = initialView.createFS(type);
         }
-        csds.addFS(fs, heapIndex);
+        if (!isSofa) { // if it was a sofa, other code added or pended it
+          csds.addFS(fs, heapIndex);
+        }
         
         for (final FeatureImpl feat : type.getFeatureImpls()) {
           SlotKind slotKind = feat.getSlotKind();

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java?rev=1745499&r1=1745498&r2=1745499&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java Wed May 25 14:59:52 2016
@@ -119,10 +119,10 @@ import org.apache.uima.util.impl.Seriali
  *   create appropriate unzip data input streams for these
  *   
  * Properties of Form 4:
- *   1) Indexes ** not used ** for determining what gets serialized, instead
- *      the "heap" is walked and all FSs found there are serialized
+ *   1) (Change from V2) Indexes are used to determine what gets serialized, because there's no "heap" to walk.
  *      
  *   2) The number used for references to FSs is a sequentially incrementing one, starting at 1
+ *       This allows better compression.
  *   
  *   
  */
@@ -382,7 +382,9 @@ public class BinaryCasSerDes4 implements
     final private DataOutputStream[] dosZipSources = new DataOutputStream[NBR_SLOT_KIND_ZIP_STREAMS];      // lazily created, indexed by SlotKind.i
 
 //    final private int[] heap;           // main heap
+    /** start of heap, in v2 pseudo-addr coordinates */
     private int heapStart;
+    /** end of heap, in v2 pseudo-addr coordinates = addr of last + length of last */
     private int heapEnd;
 //    final private LongHeap longHeapObj;
 //    final private ShortHeap shortHeapObj;
@@ -437,7 +439,11 @@ public class BinaryCasSerDes4 implements
     
     /**
      * convert between FSs and "sequential" numbers
+     *   This is for compression efficiency and also is needed for backwards compatibility with v2 serialization forms, where
+     *     index information was written using "sequential" numbers
      * Note: This may be identity map, but may not in the case for V3 where some FSs are GC'd
+     * 
+     * Contrast with fs2addr and addr2fs in csds - these use the pseudo v2 addresses as the int
      */    
     private final Obj2IntIdentityHashMap<TOP> fs2seq = new Obj2IntIdentityHashMap<TOP>(TOP.class, TOP.singleton);
     private final Int2ObjHashMap<TOP> seq2fs = new Int2ObjHashMap<>(TOP.class);
@@ -458,7 +464,7 @@ public class BinaryCasSerDes4 implements
       this.baseCas = cas.getBaseCAS();
       this.bcsd = cas.getBinaryCasSerDes();
       this.isDelta = (mark != null);
-      this.csds = isDelta ? baseCas.getCsds() : baseCas.newCsds();
+      this.csds = getCsds(baseCas, isDelta);
 //      this.ccs = new CommonCompressedSerialization(
 //          new CommonSerDesTypeMap(cas.getTypeSystemImpl(),  cas.getTypeSystemImpl()), // no type mapping
 //          mark);
@@ -544,13 +550,15 @@ public class BinaryCasSerDes4 implements
        *   addr2fs - address to feature structure
        *   sortedFSs - sorted by addr (sorted by id)
        *******************************************************************************/
+      final int origHeapEnd = (null == csds) ? 0 : csds.getHeapEnd();
       if (isDelta) {
-        csds.setup(mark.getNextFSId(), csds.getHeapEnd());
-      } else {
-        csds.clear();
-        csds.setup();
-      }
+        csds.setup(mark, origHeapEnd);  // add additional above the line items to csds
+      } // otherwise was initialized when initially set up 
       
+      /**
+       * prepare fs < -- > seq maps
+       * done for entire cas (in the case of a mark)
+       */
       fs2seq.clear();
       seq2fs.clear();
       int seq = 1;  // origin 1
@@ -560,21 +568,7 @@ public class BinaryCasSerDes4 implements
         seq2fs.put(seq++, fs);
       }
       
-      final List<TOP> newSortedFSs; // a sublist for delta cas of just the new ones
-      // may be null if there are no FSs to serialize
-      TOP firstFS = isDelta ? baseCas.getFsFromId(mark.getNextFSId()) : (localSortedFSs.size() == 0) ? null : localSortedFSs.get(0);
-      if (firstFS != null) {
-        if (isDelta) {
-          int i = Collections.binarySearch(localSortedFSs, firstFS);  // depends on increasing id's for compare order
-          assert(i >= 0);
-          newSortedFSs = localSortedFSs.subList(i, localSortedFSs.size());
-        } else {
-          newSortedFSs = localSortedFSs;
-        }
-      } else {
-        newSortedFSs = Collections.emptyList();
-      }
-       
+      List<TOP> newSortedFSs = CASImpl.filterAboveMark(csds.getSortedFSs(), mark);  // returns all if mark not set            
             
       /**************************
        * Strings
@@ -603,14 +597,16 @@ public class BinaryCasSerDes4 implements
        ***************************/
       heapEnd = csds.getHeapEnd();
       
-      
-      if (isDelta) {
-        // edge case - delta serializing with no new fs
-        heapStart = (null == firstFS) ? heapEnd : csds.fs2addr.get(firstFS);
-      } else {
-        heapStart = 0; // not 1, in order to match v2 semantics
-                       // is switched to 1 later
-      }
+      heapStart = isDelta ? origHeapEnd : 0;
+//      
+//      
+//      if (isDelta) {
+//        // edge case - delta serializing with no new fs
+//        heapStart = (null == firstFS) ? heapEnd : csds.fs2addr.get(firstFS);
+//      } else {
+//        heapStart = 0; // not 1, in order to match v2 semantics
+//                       // is switched to 1 later
+//      }
       
 //      if (isDelta) {
 //        // debug
@@ -772,27 +768,28 @@ public class BinaryCasSerDes4 implements
 
     private int compressFsxPart(int[] fsIndexes, int fsNdxStart) throws IOException {
       int ix = fsNdxStart;
-      int nbrEntries = fsIndexes[ix++];
-      int end = ix + nbrEntries;
+      final int nbrEntries = fsIndexes[ix++];
+      final int end = ix + nbrEntries;
       writeVnumber(fsIndexes_dos, nbrEntries);  // number of entries
       if (doMeasurement) {
         sm.statDetails[typeCode_i].incr(DataIO.lengthVnumber(nbrEntries));
       }
       
       final int[] ia = new int[nbrEntries];
-      System.arraycopy(fsIndexes, ix, ia, 0, nbrEntries);
+      for (int i = ix, t = 0; i < end; i++, t++) {
+        ia[t] = fs2seq(csds.addr2fs.get(fsIndexes[i]));  // convert "addr" to "seq" offset
+      }
+//      System.arraycopy(fsIndexes, ix, ia, 0, nbrEntries);
       Arrays.sort(ia);
      
       int prev = 0;           
       for (int i = 0; i < ia.length; i++) {
         int v = ia[i];
-        v = fs2seq(csds.addr2fs.get(v));  // v2 wrote the sequential number
         writeVnumber(fsIndexes_dos, v - prev);
         if (doMeasurement) {
           sm.statDetails[fsIndexes_i].incr(DataIO.lengthVnumber(v - prev));
         }
-        prev = v;
-        
+        prev = v;      
       }
       return end;
     } 
@@ -1634,8 +1631,8 @@ public class BinaryCasSerDes4 implements
     Deserializer(CASImpl cas, DataInput deserIn, boolean isDelta) throws IOException {
       this.baseCas = cas.getBaseCAS();
       this.ivCas = baseCas.getInitialView();
-      this.bcsd = cas.getBinaryCasSerDes();
-      this.csds = isDelta ? cas.getCsds() : cas.newCsds();
+      this.bcsd = cas.getBinaryCasSerDes();     
+      this.csds = getCsds(baseCas, isDelta);
       this.deserIn = deserIn;
       this.isDelta = isDelta;
       
@@ -1700,15 +1697,10 @@ public class BinaryCasSerDes4 implements
        * Prepare to walk main heap
        * The csds must be either empty (for receiving non- delta) 
        * or the same as when the CAS was previous sent out (for receiving delta)
-       *   Can't recompute it for delta case because a GC may have eliminated some of the items.
        ***************************/
       
-      if (!isDelta) {
-        csds.setup();
-      }
-      
       int seq = 1;
-      for (TOP fs : csds.getSortedFSs()) {
+      for (TOP fs : csds.getSortedFSs()) {  // only non-empty if delta; and then it's from prev serialization
 //        fs2seq.put(fs, seq);
         seq2fs.put(seq++, fs);
       }
@@ -1754,7 +1746,7 @@ public class BinaryCasSerDes4 implements
       for (int iHeap = heapStart; iHeap < heapEnd; iHeap += type.getFsSpaceReq(arraySize)) {
         
         final int typeCode = readVnumber(typeCode_dis);
-        final int adjTypeCode = typeCode + ((isBeforeV3 && typeCode >= TypeSystemImpl.javaObjectTypeCode) ? 2 : 0);
+        final int adjTypeCode = typeCode + ((isBeforeV3 && typeCode >= TypeSystemConstants.javaObjectTypeCode) ? 2 : 0);
         type = ts.getTypeForCode(adjTypeCode);
         
         prevFs = prevFsByType[adjTypeCode]; // could be null;
@@ -1797,7 +1789,7 @@ public class BinaryCasSerDes4 implements
             currentFs = baseCas.createSofa(sofaNum, sofaName, null);  
           } else {
             CASImpl view = (CASImpl) baseCas.getView(sofaRef);
-            if (type.getCode() == TypeSystemImpl.docTypeCode) {
+            if (type.getCode() == TypeSystemConstants.docTypeCode) {
               currentFs = view.getDocumentAnnotation();  // creates the document annotation if it doesn't exist
               // we could remove this from the indexes until deserialization is over, but then, other calls to getDocumentAnnotation
               // would end up creating additional instances
@@ -1805,7 +1797,7 @@ public class BinaryCasSerDes4 implements
               currentFs = view.createFS(type);
             }
           }
-          if (type.getCode() == TypeSystemImpl.docTypeCode) { 
+          if (type.getCode() == TypeSystemConstants.docTypeCode) { 
             boolean wasRemoved = baseCas.checkForInvalidFeatureSetting(currentFs, baseCas.getAddbackSingle());
             for (Runnable r : singleFsDefer) {
               r.run();
@@ -2070,8 +2062,8 @@ public class BinaryCasSerDes4 implements
       }
       
       bcsd.reinitIndexedFSs(fsIndexes.getArray(), isDelta,
-          i -> 
-             csds.addr2fs.get(i));   // written on separate line for Eclipse breakpoint control
+          i ->  
+               seq2fs.get(i)); // written on separate line for Eclipse breakpoint control
     }
 
     /** 
@@ -2126,7 +2118,7 @@ public class BinaryCasSerDes4 implements
       for (int i = 0; i < nbrEntries; i++) {
         int v = readVnumber(fsIndexes_dis) + prev;
         prev = v;
-        v = csds.fs2addr.get(seq2fs(v));
+//        v = csds.fs2addr.get(seq2fs(v));  // v is the seq form of a ref (incr by 1)
         fsIndexes.add(v);
       }
     } 
@@ -2552,9 +2544,6 @@ public class BinaryCasSerDes4 implements
             vPrevModHeapRef = v;
             
             final TOP ref_fs = seq2fs(v);
-//            if (ref_fs == null) {
-//              System.out.println("debug addr: " + v);
-//            }
             assert(ref_fs != null);
             if (isArray) {
               ((FSArray)fs).set(offsetInFs - 2, ref_fs);
@@ -2932,6 +2921,19 @@ public class BinaryCasSerDes4 implements
     throw new RuntimeException(String.format("Invalid class passed to method, class was %s", f.getClass().getName()));
   }
   
+  static CommonSerDesSequential getCsds(CASImpl cas, boolean isDelta) {
+    CommonSerDesSequential tmpCsds = isDelta ? cas.getCsds() : cas.newCsds();
+    if (null == tmpCsds || tmpCsds.isEmpty() ) {
+      // is delta but no csds
+      tmpCsds = cas.newCsds();
+      tmpCsds.setup(null, 1);
+    } else if (!isDelta) {
+      tmpCsds.setup(null, 1); // non delta case, starting with new one  
+    }
+    return tmpCsds;
+  }
+  
+
 //  public String printCasInfo(CASImpl cas) {
 //    int heapsz= cas.getHeap().getNextId() * 4;
 //    StringHeapDeserializationHelper shdh = cas.getStringHeap().serialize();
@@ -3135,13 +3137,13 @@ public class BinaryCasSerDes4 implements
   
   public static void dumpCas(CASImpl cas) {
     CommonSerDesSequential csds = new CommonSerDesSequential(cas);
-    csds.setup();
+    csds.setup(null, 1);
     
     for (TOP fs : csds.getSortedFSs()) {
-//      System.out.format("debug heapAddr: %,d type: %s%n", csds.fs2addr.get(fs), fs._getTypeImpl().getShortName());
-      if (csds.fs2addr.get(fs) == 439) {
-        System.out.format("debug, fs: %s%n", fs);
-      }
+      System.out.format("debug heapAddr: %,d type: %s%n", csds.fs2addr.get(fs), fs._getTypeImpl().getShortName());
+//      if (csds.fs2addr.get(fs) == 439) {
+//        System.out.format("debug, fs: %s%n", fs);
+//      }
     }
     System.out.format("debug heapend: %,d%n", csds.getHeapEnd());
   }

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java?rev=1745499&r1=1745498&r2=1745499&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java Wed May 25 14:59:52 2016
@@ -66,6 +66,7 @@ import org.apache.uima.internal.util.Int
 import org.apache.uima.internal.util.IntVector;
 import org.apache.uima.internal.util.Misc;
 import org.apache.uima.internal.util.Pair;
+import org.apache.uima.internal.util.PositiveIntSet;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.BooleanArray;
 import org.apache.uima.jcas.cas.ByteArray;
@@ -249,9 +250,9 @@ public class BinaryCasSerDes6 implements
      *   - Delta serialization (uses reuse info saved during initial deserialization)
      *   - Delta deserialization 
      *   if Null, recomputed when needed
-     * BitSet used to test if fsRef needs to be serialized   
+     * foundFSs used to test if fsRef needs to be serialized   
      */
-    final private BitSet foundFSsBitset;
+    final private PositiveIntSet foundFSs;
     final private List<TOP> fssToSerialize; // ordered list of FSs found in indexes or linked from other found FSs
     
     /**
@@ -262,17 +263,17 @@ public class BinaryCasSerDes6 implements
     final private CasSeqAddrMaps fsStartIndexes;
     
     private ReuseInfo(
-        BitSet foundFSsBitset,
+        PositiveIntSet foundFSs,
         List<TOP> fssToSerialize, 
         CasSeqAddrMaps fsStartIndexes) {
-      this.foundFSsBitset = foundFSsBitset;
+      this.foundFSs = foundFSs;
       this.fssToSerialize = fssToSerialize;
       this.fsStartIndexes = fsStartIndexes;
     }
   }
   
   public ReuseInfo getReuseInfo() {
-    return new ReuseInfo(foundFSsBitset, fssToSerialize, fsStartIndexes);
+    return new ReuseInfo(foundFSs, fssToSerialize, fsStartIndexes);
   }
     
   /**
@@ -367,13 +368,13 @@ public class BinaryCasSerDes6 implements
    * ordered set of FSs found in indexes or linked from other found FSs.
    * used to control loops/recursion when locating things
    */
-  private BitSet foundFSsBitset;
+  private PositiveIntSet foundFSs;
   
   /**
    * ordered set of FSs found in indexes or linked from other found FSs, which are below the mark.
    * used to control loops/recursion when locating things
    */
-  private BitSet foundFSsBelowMarkBitset;
+  private PositiveIntSet foundFSsBelowMark;
   
   /**
    * FSs being serialized. For delta, just the deltas above the delta line.
@@ -540,12 +541,12 @@ public class BinaryCasSerDes6 implements
     this.compressStrategy = compressStrategy;
     reuseInfoProvided = (rfs != null);
     if (reuseInfoProvided) {
-      foundFSsBitset = rfs.foundFSsBitset;  // broken for serialization - not reused
+      foundFSs = rfs.foundFSs;  // broken for serialization - not reused
       fssToSerialize = rfs.fssToSerialize;  // broken for serialization - not reused
       // TODO figure out why there's a copy for next
       fsStartIndexes = rfs.fsStartIndexes.copy();
     } else {
-      foundFSsBitset = null;
+      foundFSs = null;
       fssToSerialize = null;
       fsStartIndexes = new CasSeqAddrMaps();
     }
@@ -1375,7 +1376,7 @@ public class BinaryCasSerDes6 implements
 
         // probably don't need this test, because change logging is done when a mark is set, 
         //   only for items below the line
-        if (!foundFSsBelowMarkBitset.get(fs._id)) {
+        if (!foundFSsBelowMark.contains(fs._id)) {
 //          System.out.format("  skipping heap addr %,d%n", currentFsId);
           continue;        
         }
@@ -1421,8 +1422,8 @@ public class BinaryCasSerDes6 implements
         // perhaps part of this if test is not needed:
         //   the id is probably guaranteed to be below the split point
         //   because logging doesn't happen unless a change is below the mark
-        if ((id >= splitPoint && !foundFSsBitset.get(id)) ||
-            (id < splitPoint && !foundFSsBelowMarkBitset.get(id))) {
+        if ((id >= splitPoint && !foundFSs.contains(id)) ||
+            (id < splitPoint && !foundFSsBelowMark.contains(id))) {
           // although it was modified, it isn't going to be serialized because
           //   it isn't indexed or referenced
           continue;
@@ -1769,7 +1770,7 @@ public class BinaryCasSerDes6 implements
             currentFs = cas.createSofa(sofaNum, sofaName, sofaMimeType);  
           } else {
             CASImpl view = (CASImpl) cas.getView(sofaRef);
-            if (srcType.getCode() == TypeSystemImpl.docTypeCode) {
+            if (srcType.getCode() == TypeSystemConstants.docTypeCode) {
               currentFs = view.getDocumentAnnotation();  // creates the document annotation if it doesn't exist
               // we could remove this from the indexes until deserialization is over, but then, other calls to getDocumentAnnotation
               // would end up creating additional instances
@@ -1777,7 +1778,7 @@ public class BinaryCasSerDes6 implements
               currentFs = view.createFS(srcType);
             }
           }
-          if (srcType.getCode() == TypeSystemImpl.docTypeCode) { 
+          if (srcType.getCode() == TypeSystemConstants.docTypeCode) { 
             boolean wasRemoved = cas.removeFromCorruptableIndexAnyView(currentFs, cas.getAddbackSingle());
             for (Runnable r : singleFsDefer) {
               r.run();
@@ -1857,7 +1858,7 @@ public class BinaryCasSerDes6 implements
     
     case Slot_Int: {
       IntegerArray ia = (IntegerArray)fs;
-      int prev = getPrevIntValue(TypeSystemImpl.intArrayTypeCode, 0);
+      int prev = getPrevIntValue(TypeSystemConstants.intArrayTypeCode, 0);
       for (int i = 0; i < length; i++) {
         int v = readDiff(Slot_Int, prev);
         prev = v;
@@ -1886,13 +1887,16 @@ public class BinaryCasSerDes6 implements
       break;
     }  
     
-      case Slot_DoubleRef: 
+      case Slot_DoubleRef:
+//        if (length == 0) {
+//          System.out.println("debug deser Double Array len 0, fsId = " + fs._id);
+//        }
         readIntoDoubleArray(((DoubleArray)fs)._getTheArray(), Slot_DoubleRef, length, storeIt); 
         break;
       
       case Slot_HeapRef: {
         FSArray fsa = (FSArray)fs;
-        int prev = getPrevIntValue(TypeSystemImpl.fsArrayTypeCode, 0);
+        int prev = getPrevIntValue(TypeSystemConstants.fsArrayTypeCode, 0);
         for (int i = 0; i < length; i++) {
           final int v = readDiff(Slot_HeapRef, prev);
           prev = v;
@@ -1980,8 +1984,8 @@ public class BinaryCasSerDes6 implements
           // When the setting is done for this one feature structure (now or at the end of deserializing features for it)
           //   two cases: the ref'd value is known, or not.
           //     - if not known, a fixup is added to
-          if (tgtType.getCode() == TypeSystemImpl.sofaTypeCode) {
-            if (tgtFeat.getCode() == TypeSystemImpl.sofaArrayFeatCode) { // sofaArrayFeatCode is the ref to array for sofa data
+          if (tgtType.getCode() == TypeSystemConstants.sofaTypeCode) {
+            if (tgtFeat.getCode() == TypeSystemConstants.sofaArrayFeatCode) { // sofaArrayFeatCode is the ref to array for sofa data
               Sofa sofa = (Sofa) lfs;
               maybeStoreOrDefer_slotFixups(vh, ref_fs -> sofa.setLocalSofaData(ref_fs));
             }
@@ -2008,7 +2012,7 @@ public class BinaryCasSerDes6 implements
         break;  // null is the default value, no need to set it
       }
       if (storeIt) {
-        if (tgtType.getCode() == TypeSystemImpl.sofaTypeCode) {
+        if (tgtType.getCode() == TypeSystemConstants.sofaTypeCode) {
           if (srcFeat == srcTs.sofaId) {
             sofaName = vString; 
             break;
@@ -2381,7 +2385,8 @@ public class BinaryCasSerDes6 implements
     private short vPrevModShort = 0;
     private long vPrevModLong = 0;
     private int iHeap;
-    private int[] tgtF2srcF;
+    /** a map from target offsets to source offsets */
+    private FeatureImpl[] tgtF2srcF;
     
     // next for managing index removes / readds
     private FSsTobeAddedbackSingle addbackSingle;
@@ -2435,10 +2440,9 @@ public class BinaryCasSerDes6 implements
         }
         
         TypeImpl srcType = fs._getTypeImpl();
-//        typeInfo = ts.getTypeInfo(tCode);
-//        if (isTypeMapping) {
-//          tgtF2srcF = typeMapper.getTgtFeatOffsets2Src(tCode);
-//        }
+        if (isTypeMapping) {
+          tgtF2srcF = typeMapper.getSrcFeatures(typeMapper.mapTypeSrc2Tgt(srcType));
+        }
         
         final int numberOfModsInThisFs = readVnumber(fsIndexes_dis); 
 
@@ -2538,7 +2542,9 @@ public class BinaryCasSerDes6 implements
         iPrevTgtOffsetInFs = tgtOffsetInFs;
         
         // srcOffsetInFs is either array index or feature offset
-        final int srcOffsetInFs = (!isArray && isTypeMapping) ? tgtF2srcF[tgtOffsetInFs] : tgtOffsetInFs;
+        final int srcOffsetInFs = (!isArray && isTypeMapping) 
+                                    ? tgtF2srcF[tgtOffsetInFs].getOffset() 
+                                    : tgtOffsetInFs;
         
           // srcOffset must be >= 0 because if type mapping, and delta cas being deserialized,
           //   all of the target features would have been merged into the source ones.
@@ -2663,75 +2669,55 @@ public class BinaryCasSerDes6 implements
    */
   private void processIndexedFeatureStructures(final CASImpl cas, boolean isWrite) throws IOException {
     if (!isWrite) {
-      foundFSsBitset = new BitSet(4096);  // is 64 long words  
-      foundFSsBelowMarkBitset = isSerializingDelta ? new BitSet(1024) : null;
-      toBeScanned.clear();
-    } else {
+      AllFSs allFSs = new AllFSs(cas, mark, isTypeMapping ? fs -> isTypeInTgt(fs) : null, 
+                                            isTypeMapping ? typeMapper            : null);
+      fssToSerialize = CASImpl.filterAboveMark(allFSs.getAllFSsSorted(), mark);
+      foundFSs = allFSs.getAllNew();
+      foundFSsBelowMark = allFSs.getAllBelowMark();
+      return;
+    }
+    
+    
       
 //      if (doMeasurements) {
 //        sm.statDetails[fsIndexes_i].original = fsIndexes.length * 4 + 1;      
 //      }
-      writeVnumber(control_i, cas.getNumberOfViews());
-      writeVnumber(control_i, cas.getNumberOfSofas());
-      if (doMeasurements) {
-        sm.statDetails[fsIndexes_i].incr(1); // an approximation - probably correct
-        sm.statDetails[fsIndexes_i].incr(1);
-      }
+    writeVnumber(control_i, cas.getNumberOfViews());
+    writeVnumber(control_i, cas.getNumberOfSofas());
+    if (doMeasurements) {
+      sm.statDetails[fsIndexes_i].incr(1); // an approximation - probably correct
+      sm.statDetails[fsIndexes_i].incr(1);
     }
 
     // write or enqueue the sofas
     final FSIterator<Sofa> it = cas.getSofaIterator();
     while (it.hasNext()) {
       Sofa sofa = it.nextNvc();
-      if (isWrite) {
-        // for delta only write new sofas
-        if (!isSerializingDelta || mark.isNew(sofa)) {
-          // never returns -1, because this is for the sofa fs, and that's never filtered
-          final int v = getTgtSeqFromSrcFS(sofa);
-          writeVnumber(control_i, v);    // version 1
-           
-          if (doMeasurements) {
-            sm.statDetails[fsIndexes_i].incr(DataIO.lengthVnumber(v));
-          }
+      // for delta only write new sofas
+      if (!isSerializingDelta || mark.isNew(sofa)) {
+        // never returns -1, because this is for the sofa fs, and that's never filtered
+        final int v = getTgtSeqFromSrcFS(sofa);
+        writeVnumber(control_i, v);    // version 1
+         
+        if (doMeasurements) {
+          sm.statDetails[fsIndexes_i].incr(DataIO.lengthVnumber(v));
         }
-      } else {
-        enqueueFS(sofa);  //sofa fs's always in the type system
       }
     }
     TypeImpl topType = (TypeImpl) cas.getTypeSystemImpl().getTopType();
 
     // write (id's only, for index info) and/or enqueue indexed FSs, either all, or (for delta writes) the added/deleted/reindexed ones
-    if (isWrite) {
-      cas.forAllViews(view -> {
-        processFSsForView(true, true,   // is enqueue, is write
-          isSerializingDelta 
-            ? view.indexRepository.getAddedFSs().stream()
-            : view.indexRepository.<TOP>getAllIndexedFS(topType).asStream());
-        if (isSerializingDelta) {
-          // for write/delta, write out (but don't enqueue) the deleted/reindexed FSs
-          processFSsForView(false, true, view.indexRepository.getDeletedFSs().stream());
-          processFSsForView(false, true, view.indexRepository.getReindexedFSs().stream());
-        }
-      });   
-    } else { // is not write
-      // debug
-//      cas.forAllViews(view -> System.out.println("View name is " + view.getViewName()));
-//      cas.getInitialView().getIndexRepository().getAllIndexedFS(topType);
-      cas.forAllViews(view -> 
-        processFSsForView  (true, false, view.indexRepository.<TOP>getAllIndexedFS(topType).asStream()));
-      processRefedFSs();
-      // convert representation from bitset to list<TOP>
-      final int fsslen = foundFSsBitset.cardinality();
-      fssToSerialize = new ArrayList<>(fsslen);
-      final int len = foundFSsBitset.length();
-    
-      for (int b = 0; b < len; b++) {
-        b = foundFSsBitset.nextSetBit(b);
-        fssToSerialize.add(cas.getFsFromId(b));
+    cas.forAllViews(view -> {
+      processFSsForView(true,  // is enqueue
+        isSerializingDelta 
+          ? view.indexRepository.getAddedFSs().stream()
+          : view.indexRepository.<TOP>getAllIndexedFS(topType).asStream());
+      if (isSerializingDelta) {
+        // for write/delta, write out (but don't enqueue) the deleted/reindexed FSs
+        processFSsForView(false, view.indexRepository.getDeletedFSs().stream());
+        processFSsForView(false, view.indexRepository.getReindexedFSs().stream());
       }
-    }
-    
-    return;
+    });       
   }
 
   /**
@@ -2745,52 +2731,50 @@ public class BinaryCasSerDes6 implements
    */
  
     
-  private void processFSsForView(final boolean isEnqueue, final boolean isWrite,
-                                Stream<TOP> fss) {
+  private void processFSsForView(final boolean isEnqueue, Stream<TOP> fss) {
     //  prev id and entries written as a captured value in context
 
     final int prevId = 0, entriesWritten = 1;  // indexes into context
 //    Stream<TOP> stream = (fssx instanceof FSIterator<?>) 
 //        ? ((FSIterator<TOP>)fssx).asStream()
 //        : ((Set<TOP>)fssx).stream();
-    if (isWrite) {
-      final int[] context = {0, 0};  
-      fss.sorted()
-        .forEachOrdered(fs -> {
-          // skip write if typemapping, and target type isn't there
-          if (isWrite && isTypeInTgt(fs)) {
-            
-            final int tgtId = getTgtSeqFromSrcFS(fs);
-            assert(tgtId > 0);
-            final int delta = tgtId - context[prevId];
-            context[prevId] = tgtId;
-            
-            try {
-              writeVnumber(fsIndexes_dos, delta);
-            } catch (Exception e) { 
-              throw new RuntimeException(e);
-            }
-            context[entriesWritten] ++;
-            if (doMeasurements) {
-              sm.statDetails[fsIndexes_i].incr(DataIO.lengthVnumber(delta));
-            }
-          } // end of conditional write
-        
-          if (isEnqueue) {
-            enqueueFS(fs);
+
+    final int[] context = {0, 0};  
+    fss.sorted()
+      .forEachOrdered(fs -> {
+        // skip write if typemapping, and target type isn't there
+//          if (fs._id == 199) { 
+//            System.out.println("debug write out fs id 199 as 119");
+//          }
+        if (isTypeInTgt(fs)) {
+          
+          final int tgtId = getTgtSeqFromSrcFS(fs);
+          assert(tgtId > 0);
+          final int delta = tgtId - context[prevId];
+          context[prevId] = tgtId;
+          
+          try {
+            writeVnumber(fsIndexes_dos, delta);
+          } catch (Exception e) { 
+            throw new RuntimeException(e);
           }
-        });
-      try {
-        writeVnumber(control_dos, context[entriesWritten]);
-      } catch (IOException e) {
-        throw new RuntimeException(e);
-      }   
-      if (doMeasurements) {
-        sm.statDetails[typeCode_i].incr(DataIO.lengthVnumber(entriesWritten));
-      }
-    } else if (isEnqueue) {
-      // not write case, just enqueue, not sorted
-      fss.forEach(fs -> enqueueFS(fs));
+          context[entriesWritten] ++;
+          if (doMeasurements) {
+            sm.statDetails[fsIndexes_i].incr(DataIO.lengthVnumber(delta));
+          }
+        } // end of conditional write
+      
+        if (isEnqueue) {
+          enqueueFS(fs);
+        }
+      });
+    try {
+      writeVnumber(control_dos, context[entriesWritten]);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }   
+    if (doMeasurements) {
+      sm.statDetails[typeCode_i].incr(DataIO.lengthVnumber(entriesWritten));
     }
   }
   
@@ -2807,13 +2791,13 @@ public class BinaryCasSerDes6 implements
     final int id = fs._id;
     
     if (!isSerializingDelta || mark.isNew(fs)) { // separately track items below the line
-      if (!foundFSsBitset.get(id)) {
-        foundFSsBitset.set(id);
+      if (!foundFSs.contains(id)) {
+        foundFSs.add(id);
         toBeScanned.add(fs);
       }
     } else {
-      if (!foundFSsBelowMarkBitset.get(id)) {
-        foundFSsBelowMarkBitset.set(id);
+      if (!foundFSsBelowMark.contains(id)) {
+        foundFSsBelowMark.add(id);
         toBeScanned.add(fs);
       }
     }
@@ -3398,7 +3382,7 @@ public class BinaryCasSerDes6 implements
           } else if (null == refFs2) {
             return 1;
           }
-          if (refFs1._getTypeCode() == TypeSystemImpl.sofaTypeCode) {
+          if (refFs1._getTypeCode() == TypeSystemConstants.sofaTypeCode) {
             c = Integer.compare(refFs1._id,  refFs2._id);
             if (c != 0) return c; // approximate
             continue; 

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java?rev=1745499&r1=1745498&r2=1745499&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java Wed May 25 14:59:52 2016
@@ -28,12 +28,12 @@ import java.io.PrintStream;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.io.UnsupportedEncodingException;
-import java.lang.ref.WeakReference;
 import java.net.URL;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.BitSet;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.IdentityHashMap;
@@ -44,6 +44,7 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.Consumer;
+import java.util.function.Predicate;
 
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.UIMARuntimeException;
@@ -87,6 +88,7 @@ import org.apache.uima.cas.impl.SlotKind
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.cas.text.AnnotationIndex;
 import org.apache.uima.cas.text.Language;
+import org.apache.uima.internal.util.Int2ObjHashMap;
 import org.apache.uima.internal.util.IntVector;
 import org.apache.uima.internal.util.Misc;
 import org.apache.uima.internal.util.PositiveIntSet;
@@ -462,7 +464,6 @@ public class CASImpl extends AbstractCas
       // fss
       fsIdGenerator = 0;
       id2fs.clear();
-      baseCAS.resetId2fswInAllViews(); // follows id2fs.clear() above
 
       // index corruption avoidance
       fssTobeAddedback.clear();
@@ -587,7 +588,6 @@ public class CASImpl extends AbstractCas
    */
   
   private TypeSystemImpl tsi_local;
-  private ArrayList<Object> id2fsw_local;
 
   // CASImpl(TypeSystemImpl typeSystem) {
   // this(typeSystem, DEFAULT_INITIAL_HEAP_SIZE);
@@ -808,18 +808,7 @@ public class CASImpl extends AbstractCas
     } 
     this.getBaseCAS().tsi_local = ts;  
   }
-  
-  void resetId2fswInAllViews() {
-    final List<CASImpl> sn2v = this.svd.sofaNbr2ViewMap;
-    if (sn2v.size() > 0) {
-      for (CASImpl view : sn2v.subList(1, sn2v.size())) {
-        view.id2fsw_local = this.svd.id2fs.getId2fsw();
-      }
-    } 
-    this.getBaseCAS().id2fsw_local = this.svd.id2fs.getId2fsw();  
     
-  }
-  
   @Override
   public ConstraintFactory getConstraintFactory() {
     return ConstraintFactory.instance();
@@ -843,19 +832,23 @@ public class CASImpl extends AbstractCas
   
   private <T extends FeatureStructureImplC> T createFSAnnotCheck(TypeImpl ti) {
     if (ti.isAnnotationBaseType()) {
-      if (this.isBaseCas()) {    
-        throw new CASRuntimeException(CASRuntimeException.DISALLOW_CREATE_ANNOTATION_IN_BASE_CAS, ti.getName());
-      }
-      getSofaRef();  // create sofa in _InitialView if needed
+      // not here, will be checked later in AnnotationBase constructor
+//      if (this.isBaseCas()) {    
+//        throw new CASRuntimeException(CASRuntimeException.DISALLOW_CREATE_ANNOTATION_IN_BASE_CAS, ti.getName());
+//      }
+      getSofaRef();  // materialize this if not present; required for setting the sofa ref
+                     // must happen before the annotation is created, for compressed form 6 serialization order
+                     // to insure sofa precedes the ref of it
     }
   
+    ;
     T fs = (T) (((FsGenerator)getFsGenerator(ti.getCode())).createFS(ti, this));
     return fs;
   } 
   
   public int ll_createFSAnnotCheck(int typeCode) {
     TOP fs = createFSAnnotCheck(getTypeFromCode(typeCode));
-    svd.id2fs.replaceWithStrongRef(fs);
+    svd.id2fs.put(fs);
     return fs._id;
   }
   
@@ -2115,7 +2108,7 @@ public class CASImpl extends AbstractCas
       }
     }
     TOP fs = (TOP) createFS(ti);
-    svd.id2fs.replaceWithStrongRef(fs);
+    svd.id2fs.put(fs);
     return fs._id;
   }
   
@@ -2128,7 +2121,9 @@ public class CASImpl extends AbstractCas
   private TOP createFsWithExistingId(TypeImpl ti, int id) {
     svd.reuseId = id;
     try {
-      return (TOP) createFS(ti);
+      TOP fs = createFS(ti);
+      svd.id2fs.putChange(id, fs);
+      return fs;
     } finally {
       svd.reuseId = 0;
     }
@@ -2144,7 +2139,7 @@ public class CASImpl extends AbstractCas
   @Override
   public int ll_createArray(int typeCode, int arrayLength) {
     TOP fs = createArray(getTypeFromCode_checked(typeCode), arrayLength);
-    svd.id2fs.replaceWithStrongRef(fs);
+    svd.id2fs.put(fs);
     return fs._id;      
   }
 
@@ -2171,7 +2166,7 @@ public class CASImpl extends AbstractCas
   @Override
   public int ll_createByteArray(int arrayLength) {
     TOP fs = createArray(getTypeSystemImpl().byteArrayType, arrayLength);
-    svd.id2fs.replaceWithStrongRef(fs);
+    svd.id2fs.put(fs);
     return fs._id;
   }
 
@@ -2182,7 +2177,7 @@ public class CASImpl extends AbstractCas
   @Override
   public int ll_createBooleanArray(int arrayLength) {
     TOP fs = createArray(getTypeSystemImpl().booleanArrayType, arrayLength);
-    svd.id2fs.replaceWithStrongRef(fs);
+    svd.id2fs.put(fs);
     return fs._id;
   }
 
@@ -2193,7 +2188,7 @@ public class CASImpl extends AbstractCas
   @Override
   public int ll_createShortArray(int arrayLength) {
     TOP fs = createArray(getTypeSystemImpl().shortArrayType, arrayLength);
-    svd.id2fs.replaceWithStrongRef(fs);
+    svd.id2fs.put(fs);
     return fs._id;
   }
 
@@ -2204,7 +2199,7 @@ public class CASImpl extends AbstractCas
   @Override
   public int ll_createLongArray(int arrayLength) {
     TOP fs = createArray(getTypeSystemImpl().longArrayType, arrayLength);
-    svd.id2fs.replaceWithStrongRef(fs);
+    svd.id2fs.put(fs);
     return fs._id;
   }
 
@@ -2215,7 +2210,7 @@ public class CASImpl extends AbstractCas
   @Override
   public int ll_createDoubleArray(int arrayLength) {
     TOP fs = createArray(getTypeSystemImpl().doubleArrayType, arrayLength);
-    svd.id2fs.replaceWithStrongRef(fs);
+    svd.id2fs.put(fs);
     return fs._id;
   }
 
@@ -2235,7 +2230,7 @@ public class CASImpl extends AbstractCas
       }
     }
     TOP fs = createArray(ti, arrayLength);
-    svd.id2fs.replaceWithStrongRef(fs);
+    svd.id2fs.put(fs);
     return fs._id;
   }
   
@@ -2677,8 +2672,8 @@ public class CASImpl extends AbstractCas
     case Slot_LongRef:
     case Slot_DoubleRef:
       Long lng = getLongForCode(value);
-      if (lng == null && value != 0) {
-        Misc.internalError(new Exception("ll_setIntValue got null Long/Double for non-0 handle: " + value));
+      if (lng == null) {
+        Misc.internalError(new Exception("ll_setIntValue got null Long/Double for handle: " + value));
       }
       fs._setLongValueNfcCJ(fi, lng);
       break;
@@ -2808,7 +2803,7 @@ public class CASImpl extends AbstractCas
           }
         }
       }
-    });
+    }, null, null, null);
 
     if (MEASURE_SETINT) {
       mst.scantime += System.nanoTime() - st;
@@ -3575,7 +3570,7 @@ public class CASImpl extends AbstractCas
   
   public int ll_createAnnotation(int typeCode, int begin, int end) {
     TOP fs = createAnnotation(getTypeFromCode(typeCode), begin, end);
-    setId2fs(fs);
+//    setId2fs(fs);
     return fs.id();
   }
   
@@ -3650,6 +3645,7 @@ public class CASImpl extends AbstractCas
   public <T extends Annotation> T createDocumentAnnotationNoRemoveNoIndex(int length) {
     final TypeSystemImpl ts = getTypeSystemImpl();
     AnnotationFS docAnnot = createAnnotation(ts.docType, 0, length);
+    setId2FSs(docAnnot);  // because FeaturePath uses low-level access to it
     docAnnot.setStringValue(ts.langFeat, CAS.DEFAULT_LANGUAGE_NAME);
     return (T) docAnnot;    
   }
@@ -4262,25 +4258,35 @@ public class CASImpl extends AbstractCas
     return svd.casId;
   }
   
-  final public int setId2fs(TOP fs) {
-    Id2FS l = svd.id2fs;
+  final public int getNextFsId(TOP fs) {
+//    Id2FS l = svd.id2fs;
     if (svd.reuseId != 0) {
-      l.setStrongRef(fs, svd.reuseId);
+//      l.setStrongRef(fs, svd.reuseId);
       return svd.reuseId;
     }
     
-    l.add(fs);
+//    l.add(fs);
 //    if (svd.id2fs.size() != (2 + svd.fsIdGenerator.get())) {
 //      System.out.println("debug out of sync id generator and id2fs size");
 //    }
-    assert(l.size() == (2 + svd.fsIdGenerator));
-    return getNextFsId();
+//    assert(l.size() == (2 + svd.fsIdGenerator));
+    return ++ svd.fsIdGenerator;
   }
   
-  final private int getNextFsId() {
-    return ++ svd.fsIdGenerator;
+  /**
+   * Test case use
+   * @param fs the fs to include in the id 2 fs map
+   */
+  public void setId2FSs(FeatureStructure ... fss) {
+    for (FeatureStructure fs : fss) {
+      svd.id2fs.put((TOP)fs);
+    }
   }
   
+//  final private int getNextFsId() {
+//    return ++ svd.fsIdGenerator;
+//  }
+  
   final public int getLastUsedFsId() {
     return svd.fsIdGenerator;
   }
@@ -4296,38 +4302,62 @@ public class CASImpl extends AbstractCas
   }
     
   public <T extends TOP> T getFsFromId(int id) {
-    if (null == id2fsw_local) {
-      id2fsw_local = this.svd.id2fs.getId2fsw();
-    }
-    if (id < 1 || id >= id2fsw_local.size()) {
-      return null;
-    }    
-    Object o = id2fsw_local.get(id);
-    if (o == null) { 
-      return null;
-    }
-    if (o instanceof TOP) {
-      return (T) o; 
-    }
-    return (T) ((WeakReference)o).get();  // could return null if fs is gc'd    
+    return (T) this.svd.id2fs.get(id); 
   }
   
   
-  /**
-   * plus means all reachable, plus maybe others not reachable but not yet gc'd
-   * @param action -
-   */
-  public void walkReachablePlusFSsSorted(Consumer<TOP> action) {
-    this.svd.id2fs.walkReachablePlusFSsSorted(action);
-  }
+//  /**
+//   * plus means all reachable, plus maybe others not reachable but not yet gc'd
+//   * @param action -
+//   */
+//  public void walkReachablePlusFSsSorted(Consumer<TOP> action) {
+//    this.svd.id2fs.walkReachablePlusFSsSorted(action);
+//  }
   
+//  /**
+//   * called for delta serialization - walks just the new items above the line
+//   * @param action -
+//   * @param fromId - the id of the first item to walk from
+//   */
+//  public void walkReachablePlusFSsSorted(Consumer<TOP> action, int fromId) {
+//    this.svd.id2fs.walkReachablePlueFSsSorted(action, fromId);
+//  }
   /**
-   * called for delta serialization - walks just the new items above the line
-   * @param action -
-   * @param fromId - the id of the first item to walk from
-   */
-  public void walkReachablePlusFSsSorted(Consumer<TOP> action, int fromId) {
-    this.svd.id2fs.walkReachablePlueFSsSorted(action, fromId);
+   * find all of the FSs via the indexes plus what's reachable.
+   * sort into order by id,
+   * if mark is set, filter to include just those above the mark
+   * 
+   * Apply the action to those
+   * Return the (possibly filtered by mark) list of sorted FSs
+   * 
+   * @param action to perform on each item
+   * @param mark null or the mark
+   * @param includeFilter null or a filter (exclude items not in other type system
+   * @param typeMapper null or how to map to other type system, used to skip things missing in other type system
+   * @return sorted list of found items (if mark is set, only new ones)
+   */
+  public List<TOP> walkReachablePlusFSsSorted(
+      Consumer<TOP> action, MarkerImpl mark, Predicate<TOP> includeFilter, CasTypeSystemMapper typeMapper) {    
+    List<TOP> all = new AllFSs(this, mark, includeFilter, typeMapper).getAllFSsSorted();
+    if (mark != null) {
+      all = filterAboveMark(all, mark);
+    }
+    for (TOP fs : all) {
+      action.accept(fs);
+    }
+    return all;
+  }
+  
+  static List<TOP> filterAboveMark(List<TOP> all, MarkerImpl mark) {
+    if (null == mark) {
+      return all;
+    }
+    int c = Collections.binarySearch(all, TOP.createSearchKey(mark.nextFSId),
+        (fs1, fs2) -> Integer.compare(fs1._id, fs2._id));
+    if (c < 0) {
+      c = (-c) - 1;
+    }
+    return all.subList(c,  all.size());
   }
   
 //  /**
@@ -4543,6 +4573,7 @@ public class CASImpl extends AbstractCas
     b.append(" t:").append(Misc.elide(fs._getTypeImpl().getShortName(), 10));    
   }
   
+  /** only used for tracing, enables tracing 2 slots for long/double */
   private FeatureImpl prevFi;
   
   void traceFSfeat(FeatureStructureImplC fs, FeatureImpl fi, Object v) {

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java?rev=1745499&r1=1745498&r2=1745499&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java Wed May 25 14:59:52 2016
@@ -138,7 +138,7 @@ public class CASSerializer implements Se
    */
   public void addCAS(CASImpl cas, boolean addMetaData) {
     BinaryCasSerDes bcsd = cas.getBinaryCasSerDes();
-    CommonSerDesSequential csds = cas.newCsds();
+    final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(cas.getBaseCAS(), false);  // saves the csds in the cas
     scanAllFSsForBinarySerialization(bcsd, null, csds); // populates the arrays
     this.fsIndex = bcsd.getIndexedFSs(csds.fs2addr);  // must follow scanAll...
     
@@ -251,7 +251,7 @@ public class CASSerializer implements Se
   public void addCAS(CASImpl cas, OutputStream ostream) {
     final BinaryCasSerDes bcsd = cas.getBinaryCasSerDes();
     
-    final CommonSerDesSequential csds = cas.newCsds();
+    final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(cas.getBaseCAS(), false);  // saves the csds in the cas, used for delta
     scanAllFSsForBinarySerialization(bcsd, null, csds); // populates the arrays
     
     try {

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java?rev=1745499&r1=1745498&r2=1745499&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java Wed May 25 14:59:52 2016
@@ -61,12 +61,17 @@ public class CasTypeSystemMapper {
   public final WeakReference<TypeSystemImpl> tsTgt;
   
   /** 
-   * Map from source type codes to target type codes.  
+   * Map from source types to target types.  
    * Source type code used as index, 
    * value is target type or null if the type doesn't exist in the target
    */
   final private List<TypeImpl> tSrc2Tgt = new ArrayList<>();
   
+  /** 
+   * Map from target types to source types.  
+   * Source type code used as index, 
+   * value is target type or null if the type doesn't exist in the target
+   */
   final private List<TypeImpl> tTgt2Src = new ArrayList<>();
   /**
    * Feature mapping from source to target
@@ -76,7 +81,7 @@ public class CasTypeSystemMapper {
   
   /**
    * Feature mapping from target to source 
-   *   first key is the type code, 2nd is the feature offset 
+   *   first key is the tgt type code, 2nd is the tgt feature offset 
    * Only used for type codes that are not arrays.
    * Use: When serializing a source type that exists in the target, have to output
    *   the slots in the target feature order
@@ -104,12 +109,14 @@ public class CasTypeSystemMapper {
       fSrc2Tgt = new FeatureImpl[tsSrc.getTypeArraySize()][];
       fTgt2Src = new FeatureImpl[tsTgt.getTypeArraySize()][];    
     
-      // NOTE: the "&" operator applied to booleans always evals both args
-      // See http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.22.2
-      tss = tss & addTypes(tSrc2Tgt, tsSrc, tsTgt);
-      tss = tss & addTypes(tTgt2Src, tsTgt, tsSrc);  // both directions
-      tss = tss & addFeatures(fSrc2Tgt, tsSrc, tsTgt);
-      tss = tss & addFeatures(fTgt2Src, tsTgt, tsSrc);
+      boolean b1 = addTypes(tSrc2Tgt, tsSrc, tsTgt);
+      boolean b2 = addTypes(tTgt2Src, tsTgt, tsSrc);  // both directions
+      boolean b3 = addFeatures(fSrc2Tgt, tsSrc, tsTgt);
+      boolean b4 = addFeatures(fTgt2Src, tsTgt, tsSrc);
+     
+      if (!b1 || !b2 || !b3 || !b4) {
+        tss = false;
+      }
     } else {
       fSrc2Tgt = null;
       fTgt2Src = null;
@@ -165,6 +172,16 @@ public class CasTypeSystemMapper {
     return getToFeature(fTgt2Src, tgtType, tgtFeat);
   }
   
+  /**
+   * Given a tgt type, return an array of source features in the order
+   * they would appear in the target.
+   * @param tgtType 
+   * @return array of corresponding source features, in target type order
+   */
+  public FeatureImpl[] getSrcFeatures(TypeImpl tgtType) {
+    return fTgt2Src[tgtType.getCode()];
+  }
+  
   public FeatureImpl getToFeature(FeatureImpl[][] mapByTypeCode, TypeImpl fromType, FeatureImpl fromFeat) {
     if (mapByTypeCode == null) { // is null if type systems ==
       return fromFeat;
@@ -180,7 +197,13 @@ public class CasTypeSystemMapper {
     return map[offset];
   }
   
-  
+  /**
+   * return true if no types are filtered
+   * @param map
+   * @param tsSrc
+   * @param tsTgt
+   * @return
+   */
   private boolean addTypes(List<TypeImpl> map, TypeSystemImpl tsSrc, TypeSystemImpl tsTgt) {
     boolean r = true;
     for (TypeImpl tSrc : tsSrc.getAllTypes()) {

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java?rev=1745499&r1=1745498&r2=1745499&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java Wed May 25 14:59:52 2016
@@ -40,6 +40,7 @@ import org.apache.uima.jcas.cas.TOP;
  *   Creation:  
  *     non-delta serialization
  *     non-delta deserialization
+ *     for delta serialization, a previous instance is used if available, otherwise a new csds is made
  *     
  *   Reset: 
  *     CAS Reset
@@ -48,12 +49,13 @@ import org.apache.uima.jcas.cas.TOP;
  *   Logical constraints:
  *     - delta de/serialization must use an existing version of this,
  *        -- set during a previous non-delta de/serialization
+ *        -- or created just in time via a scan of the cas
  */
 public class CommonSerDesSequential {
 
   public static final boolean TRACE_SETUP = false;
   /**
-   * a map from a fs to its addr in the modeled heap
+   * a map from a fs to its addr in the modeled heap, == v2 style addr
    * 
    * created during serialization and deserialization
    * used during serialization to create addr info for index info serialization
@@ -63,7 +65,7 @@ public class CommonSerDesSequential {
   final Obj2IntIdentityHashMap<TOP> fs2addr = new Obj2IntIdentityHashMap<>(TOP.class, TOP.singleton);
 
   /**
-   * a map from the modelled FS addr to the V3 FS
+   * a map from the modelled (v2 style) FS addr to the V3 FS
    * created when serializing (non-delta), deserializing (non-delta)
    *   augmented when deserializing(delta)
    * used when deserializing (delta and non-delta)
@@ -99,14 +101,22 @@ public class CommonSerDesSequential {
   public CommonSerDesSequential(CASImpl cas) {
     this.baseCas = cas.getBaseCAS();
   }
+  
+  public boolean isEmpty() {
+    return sortedFSs.isEmpty() && pending.isEmpty();
+  }
 
   /**
    * Must call in fs sorted order
    * @param fs
    */
   void addFS(TOP fs, int addr) {
-    fs2addr.put(fs, addr);
+    addFS1(fs, addr);
     sortedFSs.add(fs);
+  }
+  
+  void addFS1(TOP fs, int addr) {
+    fs2addr.put(fs, addr);
     addr2fs.put(addr, fs);
   }
   
@@ -115,9 +125,8 @@ public class CommonSerDesSequential {
    * @param fs
    */
   void addFSunordered(TOP fs, int addr) {
-    fs2addr.put(fs, addr);
+    addFS1(fs, addr);
     pending.add(fs);
-    addr2fs.put(addr, fs);
   }  
       
   void clear() {
@@ -125,38 +134,44 @@ public class CommonSerDesSequential {
     fs2addr.clear();
     addr2fs.clear();
     pending.clear();
+    heapEnd = 0;
   }
   
-  void setup(int fromId, int fromAddr) {
+  void setup(MarkerImpl mark, int fromAddr) {
+    if (mark == null) {
+      clear();
+    }
     // local value as "final" to permit use in lambda below
     final int[] nextAddr = {fromAddr};
     if (TRACE_SETUP) System.out.println("Cmn serDes sequential setup called by: " + Misc.getCaller());
 
-    baseCas.walkReachablePlusFSsSorted(fs -> {
-      addFS(fs, nextAddr[0]);
-      if (TRACE_SETUP) {
-        System.out.format("Cmn serDes sequential setup: add FS id: %,4d addr: %,5d  type: %s%n", fs.id(), nextAddr[0], fs._getTypeImpl().getShortName());
-      }
-      nextAddr[0] += BinaryCasSerDes.getFsSpaceReq(fs, fs._getTypeImpl());  
-    }, fromId);
+    List<TOP> allAboveMark = baseCas.walkReachablePlusFSsSorted(fs -> {
+          addFS1(fs, nextAddr[0]);
+          if (TRACE_SETUP) {
+            System.out.format("Cmn serDes sequential setup: add FS id: %,4d addr: %,5d  type: %s%n", fs.id(), nextAddr[0], fs._getTypeImpl().getShortName());
+          }
+          nextAddr[0] += BinaryCasSerDes.getFsSpaceReq(fs, fs._getTypeImpl());  
+        }, mark, null, null);
+    
+    sortedFSs.addAll(allAboveMark);
     heapEnd = nextAddr[0];
 //    if (heapEnd == 0) {
 //      System.out.println("debug");
 //    }
   }
   
-  /**
-   * called to augment an existing csds with information on FSs added after the mark was set
-   * @param mark -
-   */
-  void setup() { setup(1, 1); }
-  
-  void walkSeqFSs(Consumer_T_withIOException<TOP> action) throws IOException {
-    for (TOP fs : sortedFSs) {
-      action.accept(fs);
-    }
-  }
-  
+//  /**
+//   * called to augment an existing csds with information on FSs added after the mark was set
+//   * @param mark -
+//   */
+//  void setup() { setup(1, 1); }
+  
+//  void walkSeqFSs(Consumer_T_withIOException<TOP> action) throws IOException {
+//    for (TOP fs : sortedFSs) {
+//      action.accept(fs);
+//    }
+//  }
+//  
   List<TOP> getSortedFSs() {
     if (pending.size() != 0) {
       merge();

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSIndexRepositoryImpl.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSIndexRepositoryImpl.java?rev=1745499&r1=1745498&r2=1745499&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSIndexRepositoryImpl.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSIndexRepositoryImpl.java Wed May 25 14:59:52 2016
@@ -1237,8 +1237,11 @@ public class FSIndexRepositoryImpl imple
   
   private <T extends TOP> void addFS_common(T fs, boolean isAddback) {
     TypeImpl ti = ((FeatureStructureImplC)fs)._getTypeImpl();
-    final int typeCode = ti.getCode();    
+    final int typeCode = ti.getCode();  
 
+    if (typeCode != TypeSystemConstants.sofaTypeCode && cas.isBaseCas()) {
+      throw new CASRuntimeException(CASRuntimeException.ILLEGAL_ADD_TO_INDEX_IN_BASE_CAS, fs, cas);
+    }
     // https://issues.apache.org/jira/browse/UIMA-4099
     // skip test for wrong view if addback, etc.
  

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/FeatureStructureImplC.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/FeatureStructureImplC.java?rev=1745499&r1=1745498&r2=1745499&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/FeatureStructureImplC.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/FeatureStructureImplC.java Wed May 25 14:59:52 2016
@@ -181,7 +181,7 @@ public class FeatureStructureImplC imple
     _intData = _allocIntData();
     _refData = _allocRefData();
 
-    _id = casView.setId2fs((TOP)this);   
+    _id = casView.getNextFsId((TOP)this);   
     
     if (traceFSs && !(this instanceof CommonArray)) {
       _casView.traceFSCreate(this);
@@ -205,7 +205,7 @@ public class FeatureStructureImplC imple
     _intData = _allocIntData();
     _refData = _allocRefData();    
     
-    _id = _casView.setId2fs((TOP)this); 
+    _id = _casView.getNextFsId((TOP)this); 
 
     if (traceFSs && !(this instanceof CommonArray)) {
       _casView.traceFSCreate(this);
@@ -506,13 +506,6 @@ public class FeatureStructureImplC imple
     if (IS_ENABLE_RUNTIME_FEATURE_VALIDATION) featureValidation(feat);
     if (IS_ENABLE_RUNTIME_FEATURE_VALUE_VALIDATION) featureValueValidation(feat, v);
 
-    if (fi.getCode() == TypeSystemConstants.annotBaseSofaFeatCode) {
-      // trying to set the sofa - don't do this, but check if the value
-      // is OK (note: may break backwards compatibility)  
-      if (v != _getFeatureValueNc(AnnotationBase._FI_sofa)) {
-        throw new CASRuntimeException(CASRuntimeException.ILLEGAL_SOFAREF_MODIFICATION);
-      }
-    }
     // no need to check for index corruption because fs refs can't be index keys
     _setRefValueCommon(fi, v);
     _casView.maybeLogUpdate(this, fi);
@@ -949,42 +942,42 @@ public class FeatureStructureImplC imple
       return;
     }
     switch (_getTypeCode()) {
-    case TypeSystemImpl.stringArrayTypeCode: {
+    case TypeSystemConstants.stringArrayTypeCode: {
       StringArray a = (StringArray) this;
       printArrayElements(a.size(), i -> a.get(i), indent, buf);
       return;
     }
-    case TypeSystemImpl.intArrayTypeCode: {
+    case TypeSystemConstants.intArrayTypeCode: {
       IntegerArray a = (IntegerArray) this;
       printArrayElements(a.size(), i -> Integer.toString(a.get(i)), indent, buf);
       return;
     }
-    case TypeSystemImpl.floatArrayTypeCode: {
+    case TypeSystemConstants.floatArrayTypeCode: {
       FloatArray a = (FloatArray) this;
       printArrayElements(a.size(), i -> Float.toString(a.get(i)), indent, buf);
       return;
     }
-    case TypeSystemImpl.booleanArrayTypeCode: {
+    case TypeSystemConstants.booleanArrayTypeCode: {
       BooleanArray a = (BooleanArray) this;
       printArrayElements(a.size(), i -> Boolean.toString(a.get(i)), indent, buf);
       return;
     }
-    case TypeSystemImpl.byteArrayTypeCode: {
+    case TypeSystemConstants.byteArrayTypeCode: {
       ByteArray a = (ByteArray) this;
       printArrayElements(a.size(), i -> Byte.toString(a.get(i)), indent, buf);
       return;
     }
-    case TypeSystemImpl.shortArrayTypeCode: {
+    case TypeSystemConstants.shortArrayTypeCode: {
       ShortArray a = (ShortArray) this;
       printArrayElements(a.size(), i -> Short.toString(a.get(i)), indent, buf);
       return;
     }
-    case TypeSystemImpl.longArrayTypeCode: {
+    case TypeSystemConstants.longArrayTypeCode: {
       LongArray a = (LongArray) this;
       printArrayElements(a.size(), i -> Long.toString(a.get(i)), indent, buf);
       return;
     }
-    case TypeSystemImpl.doubleArrayTypeCode: {
+    case TypeSystemConstants.doubleArrayTypeCode: {
       DoubleArray a = (DoubleArray) this;
       printArrayElements(a.size(), i -> Double.toString(a.get(i)), indent, buf);
       return;
@@ -1155,13 +1148,13 @@ public class FeatureStructureImplC imple
     TypeImpl range = fi.getRangeImpl();
     if (fi.isInInt) {
       switch (range.getCode()) {
-      case TypeSystemImpl.floatTypeCode :
+      case TypeSystemConstants.floatTypeCode :
         return Float.toString(getFloatValue(feat));
-      case TypeSystemImpl.booleanTypeCode :
+      case TypeSystemConstants.booleanTypeCode :
         return Boolean.toString(getBooleanValue(feat));
-      case TypeSystemImpl.longTypeCode :
+      case TypeSystemConstants.longTypeCode :
         return Long.toString(getLongValue(feat));
-      case TypeSystemImpl.doubleTypeCode :
+      case TypeSystemConstants.doubleTypeCode :
         return Double.toString(getDoubleValue(feat));
       default: // byte, short, int, 
         return Integer.toString(getIntValue(feat));
@@ -1172,7 +1165,7 @@ public class FeatureStructureImplC imple
       return getStringValue(feat);
     }
     
-    if (range.getCode() == TypeSystemImpl.javaObjectTypeCode) {
+    if (range.getCode() == TypeSystemConstants.javaObjectTypeCode) {
       return CASImpl.serializeJavaObject(getJavaObjectValue(feat));
     }
     
@@ -1298,25 +1291,25 @@ public class FeatureStructureImplC imple
 
     /* The assignment is stricter than the Java rules - must match */
     switch (rangeTypeCode) {
-    case TypeSystemImpl.booleanArrayTypeCode:
+    case TypeSystemConstants.booleanArrayTypeCode:
       return v instanceof BooleanArray;
-    case TypeSystemImpl.byteArrayTypeCode:
+    case TypeSystemConstants.byteArrayTypeCode:
     return v instanceof ByteArray;
-    case TypeSystemImpl.shortArrayTypeCode:
+    case TypeSystemConstants.shortArrayTypeCode:
       return v instanceof ShortArray;
-    case TypeSystemImpl.intArrayTypeCode:
+    case TypeSystemConstants.intArrayTypeCode:
       return v instanceof IntegerArray;
-    case TypeSystemImpl.floatArrayTypeCode:
+    case TypeSystemConstants.floatArrayTypeCode:
       return v instanceof FloatArray;
-    case TypeSystemImpl.longArrayTypeCode:
+    case TypeSystemConstants.longArrayTypeCode:
       return v instanceof LongArray;
-    case TypeSystemImpl.doubleArrayTypeCode:
+    case TypeSystemConstants.doubleArrayTypeCode:
       return v instanceof DoubleArray;
-    case TypeSystemImpl.stringArrayTypeCode:
+    case TypeSystemConstants.stringArrayTypeCode:
       return v instanceof StringArray;
-    case TypeSystemImpl.javaObjectArrayTypeCode:
+    case TypeSystemConstants.javaObjectArrayTypeCode:
       return v instanceof JavaObjectArray;
-    case TypeSystemImpl.fsArrayTypeCode:
+    case TypeSystemConstants.fsArrayTypeCode:
       return v instanceof FSArray;
     }
     

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Id2FS.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Id2FS.java?rev=1745499&r1=1745498&r2=1745499&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Id2FS.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Id2FS.java Wed May 25 14:59:52 2016
@@ -19,11 +19,11 @@
 
 package org.apache.uima.cas.impl;
 
-import java.lang.ref.WeakReference;
-import java.util.ArrayList;
-import java.util.function.Consumer;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
 
-import org.apache.uima.cas.CASRuntimeException;
+import org.apache.uima.internal.util.Int2ObjHashMap;
 import org.apache.uima.internal.util.Misc;
 import org.apache.uima.jcas.cas.TOP;
 
@@ -31,149 +31,265 @@ import org.apache.uima.jcas.cas.TOP;
  * A map from ints representing FS id's (or "addresses") to those FSs
  * There is one map instance per CAS (all views).
  * 
- * The values are weak references, to allow gc
+ * The map is not populated, normally.  It is only populated when there is a need to
+ *   be able to map from the id to the FS, or to prevent the FS from being GC'd
+ *   -- The low level CAS apis for creating FSs have this need, because they return the id, and this
+ *      won't prevent the FS from being GC'd.  
  * 
- * New additions always have increasing int keys.
- * 
- * IN THIS IMPL, the id is the index into the array.
- * IN THIS IMPL, Searching is by simple index lookup in an ArrayList
- * 
- * Removes not supported; the weak refs allow garbage collection to reclaim the feature structure space.
- * 
- * Alternative: a map based on sorted arrays, searched by binary search
+ * Removes not supported; they happen when the map is reset / cleared
  */
 public class Id2FS {
+  static final boolean MEASURE = false;
+  private static final int MEASURE_STACK_SIZE = 10;
+  private static Map<MeasureCaller, MeasureCaller> callers = MEASURE ? new HashMap<>() : null;
+  private static Map<MeasureCaller, MeasureCaller> walkableCallers = MEASURE ? new HashMap<>() : null;
+
+//  /**
+//   * Set this JVM property to true for backwards compatibility, where an application retains
+//   * some references to Feature Structures held only using the low-level references (which are ints)..
+//   */
+//  public static final String DISABLE_FS_GC = "uima.disable_feature_structure_garbage_collection";
+//  
+//  public static final boolean IS_DISABLE_FS_GC =   // true || // disabled due to performance
+//      Misc.getNoValueSystemProperty(DISABLE_FS_GC);
   
-  /**
-   * Set this JVM property to true for backwards compatibility, where an application retains
-   * some references to Feature Structures held only using the low-level references (which are ints)..
-   */
-  public static final String DISABLE_FS_GC = "uima.disable_feature_structure_garbage_collection";
-  
-  public static final boolean IS_DISABLE_FS_GC =   // true || // debug
-      Misc.getNoValueSystemProperty(DISABLE_FS_GC);
-  
-  final private ArrayList<Object> id2fsw;
+  final private Int2ObjHashMap<TOP> id2fs;
+  final private int initialSize;
+    
+  public Id2FS(int initialHeapSize) {
+    this.initialSize = Math.max(32, initialHeapSize >> 4);  // won't shrink below this
+    id2fs = new Int2ObjHashMap(TOP.class, initialSize); 
+  }
+
+  void put(int id, TOP fs) {
+    TOP prev = id2fs.put(id, fs);
+    assert prev == null;
+  }
+  
+  void putChange(int id, TOP fs) {
+    TOP prev = id2fs.put(id, fs);
+    assert prev != null;  // changing a preexisting value
+  }
+  
+  void put (TOP fs) {
+    put (fs._id, fs);
+  }
+  
+  TOP get(int id) {
+    return id2fs.get(id);
+  }
+  
+//  /**
+//   * @param fs -
+//   */
+//  public void add(TOP fs) {
+//    id2fsw.add( 
+//        null  // experiment - hangs
+////        IS_DISABLE_FS_GC 
+////                ? fs 
+////                : new WeakReference<TOP>(fs)
+//                );
+//    maxSize ++;  // tracked for computing shrinking upon clear() call
+//  }
+  
+//  public void setStrongRef(TOP fs, int i) {
+//    id2fsw.set(i, fs);
+//  }
+  
+//  public void replaceWithStrongRef(TOP fs) {
+//    if (IS_DISABLE_FS_GC) {
+//      return;
+//    } 
+//    id2fsw.set(fs._id, fs);  
+//  }
+       
+  int size() {
+    return id2fs.size(); 
+  }
   
-  public Id2FS(int initialHeapSize) {  
-    id2fsw = new ArrayList<>(initialHeapSize >> 4);  
-    id2fsw.add(null);  // because id's start with 1
+  Int2ObjHashMap<TOP> getId2fs() {
+    return id2fs;
   }
   
   /**
-   * @param fs -
+   * adjusts the underlying array down in size if grew beyond the reset heap size value
    */
-  public void add(TOP fs) {
-    id2fsw.add(IS_DISABLE_FS_GC ? fs : new WeakReference<TOP>(fs));
+  void clear() {
+    id2fs.clear();
+  // disabled for now
+  // use common routine in Misc if re-enabling
+  
+//    secondTimeShrinkable = Misc.maybeShrink(
+//        secondTimeShrinkable, id2fsw.size(), Misc.nextHigherPowerOf2(maxSize), 2, initialSize,
+//        newCapacity -> {
+//          id2fsw = new ArrayList<>(newCapacity);
+//        },
+//        () -> {
+//          id2fsw.clear();
+//        });   
+//    id2fsw.add(null); // so that ids start at 1  
+       
+//    if (id2fsw.size() > (CASImpl.DEFAULT_RESET_HEAP_SIZE >> 4)) {
+//      id2fsw.clear();
+//      id2fsw.add(null); // so that ids start at 1
+//      id2fsw.trimToSize();  
+//      id2fsw.ensureCapacity(CASImpl.DEFAULT_INITIAL_HEAP_SIZE >> 4);     
+//    } else {
+//      id2fsw.clear();
+//      id2fsw.add(null); // so that ids start at 1      
+//    }
   }
   
-  public void setStrongRef(TOP fs, int i) {
-    id2fsw.set(i, fs);
-  }
+//  /**
+//   * plus means all reachable, plus maybe others not reachable but not yet gc'd
+//   * @param action
+//   */
+//  void walkReachablePlusFSsSorted(Consumer<TOP> action) {
+//    walkReachablePlueFSsSorted(action, 1);
+//  }
+  
+//  /**
+//   * walk a part of the id2fsw list; for delta, just the part above the line
+//   * @param action
+//   * @param items the part of the id2fsw list to walk
+//   */
+//  void walkReachablePlueFSsSorted(Consumer<TOP> action, int fromId) {
+//    
+////    int i;
+////    if (fromId == 1) {
+////      i = fromId;
+////    } else {
+////      TOP holdkey = TOP.createSearchKey(fromId); // hold to kep from getting GC'd
+////      WeakReference<TOP> key = new WeakReference<TOP>(holdkey);
+////      i = Collections.binarySearch(id2fsw, key, new Comparator<WeakReference<TOP>>() {
+////        @Override
+////        public int compare(WeakReference<TOP> o1, WeakReference<TOP> o2) {
+////          TOP k1 = o1.get();
+////          if (k1 == null) return -1;
+////          return k1.compareTo(holdkey);
+////        }
+////      });
+////      
+////      if (i < 0) {
+////        i = -(i + 1); // i is (-(insertion point) - 1) 
+////      }
+////    }
+//    // in this impl, the id is the index.
+//    if (MEASURE) {
+//      trace(walkableCallers);
+//    }
+//    
+//    final int sz = id2fs.size();
+//    for (int i = fromId; i < sz; i++) {
+//      Object o = id2fs.get(i);
+//      if (o == null) { 
+//        continue;
+//      }
+//      
+//      if (o instanceof TOP) {
+//        action.accept((TOP)o);
+//      } else {
+//        TOP fs = ((WeakReference<TOP>)o).get();
+//        if (fs == null) {
+////          id2fs.set(i, null);
+//          continue;
+//        }
+//        action.accept(fs);
+//      }
+//    }   
+//  }
   
-  public void replaceWithStrongRef(TOP fs) {
-    if (IS_DISABLE_FS_GC) {
-      return;
-    } 
-    id2fsw.set(fs._id, fs);  
-  }
- 
-  public <T extends TOP> T get(int id) {
-    if (id < 1 || id >= id2fsw.size()) {
-      /** The Feature Structure ID {0} is invalid.*/
-      throw new CASRuntimeException(CASRuntimeException.INVALID_FS_ID, id);
-    }  
-    return getNoCheck(id);
+  void traceWeakGets() {
+    trace(callers);
   }
   
-  public <T extends TOP> T getWithMissingIsNull(int id) {
-    if (id < 1 || id >= id2fsw.size()) {
-      return null;
-    }    
-    return getNoCheck(id);  // could return null if fs is gc'd
+  void trace(Map<MeasureCaller, MeasureCaller> map) {
+    synchronized (map) {
+      StackTraceElement[] e = Thread.currentThread().getStackTrace();
+      MeasureCaller k = new MeasureCaller();
+      for (int i = 3, j = 0; i < e.length; i++, j++) {
+        if ( j >= MEASURE_STACK_SIZE) break;
+        k.className[j] = e[i].getClassName();
+        k.methodName[j] = e[i].getMethodName();
+        k.lineNumber[j] = e[i].getLineNumber();
+      }
+      MeasureCaller prev = map.putIfAbsent(k, k);
+      if (null != prev) {
+        prev.count++;
+      }
+    }
   }
   
-  private <T extends TOP> T getNoCheck(int id) {
-    Object o = id2fsw.get(id);
-    if (o == null) { 
-      return null;
+  private static class MeasureCaller {
+    int count = 1;
+    String[] className = new String[MEASURE_STACK_SIZE];
+    String[] methodName = new String[MEASURE_STACK_SIZE];
+    int[] lineNumber = new int[MEASURE_STACK_SIZE];
+    /* (non-Javadoc)
+     * @see java.lang.Object#hashCode()
+     */
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + Arrays.hashCode(className);
+      result = prime * result + Arrays.hashCode(lineNumber);
+      result = prime * result + Arrays.hashCode(methodName);
+      return result;
     }
-    if (o instanceof TOP) {
-      return (T) o; 
-    }
-    return (T) ((WeakReference)o).get();  // could return null if fs is gc'd    
+    /* (non-Javadoc)
+     * @see java.lang.Object#equals(java.lang.Object)
+     */
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) {
+        return true;
+      }
+      if (obj == null) {
+        return false;
+      }
+      if (!(obj instanceof MeasureCaller)) {
+        return false;
+      }
+      MeasureCaller other = (MeasureCaller) obj;
+      if (!Arrays.equals(className, other.className)) {
+        return false;
+      }
+      if (!Arrays.equals(lineNumber, other.lineNumber)) {
+        return false;
+      }
+      if (!Arrays.equals(methodName, other.methodName)) {
+        return false;
+      }
+      return true;
+    }   
   }
+
+  private static void dumpCallers(String title, Map<MeasureCaller, MeasureCaller> map) {
+    System.out.println(title + ": size:" + map.size());
+    MeasureCaller[] a = map.keySet().toArray(new MeasureCaller[map.size()]);
+    Arrays.sort(a, (c1, c2) -> - Integer.compare(c1.count, c2.count));
     
-  int size() {
-    return id2fsw.size(); 
+    for (MeasureCaller c : a) {
+      StringBuilder sb = new StringBuilder();
+      
+      for (int i = 0; i < MEASURE_STACK_SIZE; i++) {
+        if ( c.className[i] == null) break;
+        if (i != 0) sb.append(", ");
+        sb.append(Misc.formatcaller(c.className[i], c.methodName[i], c.lineNumber[i]));
+      }
+    
+      System.out.format("count: %,d, %s%n", c.count, sb);
+    }    
   }
   
-  /**
-   * adjusts the underlying array down in size if grew beyond the reset heap size value
-   */
-  void clear() {
-    if (id2fsw.size() > (CASImpl.DEFAULT_RESET_HEAP_SIZE >> 4)) {
-      id2fsw.clear();
-      id2fsw.add(null); // so that ids start at 1
-      id2fsw.trimToSize();  
-      id2fsw.ensureCapacity(CASImpl.DEFAULT_INITIAL_HEAP_SIZE >> 4);     
-    } else {
-      id2fsw.clear();
-      id2fsw.add(null); // so that ids start at 1      
+  static {
+    if (MEASURE) {
+      Runtime.getRuntime().addShutdownHook(new Thread(null, () -> {
+        dumpCallers("Callers of getId with weak ref", callers);
+        dumpCallers("Callers of walkReachablePlueFSsSorted", walkableCallers);        
+      }, "Dump id2fs weak"));
     }
   }
-  
-  /**
-   * plus means all reachable, plus maybe others not reachable but not yet gc'd
-   * @param action
-   */
-  void walkReachablePlusFSsSorted(Consumer<TOP> action) {
-    walkReachablePlueFSsSorted(action, 1);
-  }
-  
-  /**
-   * walk a part of the id2fsw list; for delta, just the part above the line
-   * @param action
-   * @param items the part of the id2fsw list to walk
-   */
-  void walkReachablePlueFSsSorted(Consumer<TOP> action, int fromId) {
-//    int i;
-//    if (fromId == 1) {
-//      i = fromId;
-//    } else {
-//      TOP holdkey = TOP.createSearchKey(fromId); // hold to kep from getting GC'd
-//      WeakReference<TOP> key = new WeakReference<TOP>(holdkey);
-//      i = Collections.binarySearch(id2fsw, key, new Comparator<WeakReference<TOP>>() {
-//        @Override
-//        public int compare(WeakReference<TOP> o1, WeakReference<TOP> o2) {
-//          TOP k1 = o1.get();
-//          if (k1 == null) return -1;
-//          return k1.compareTo(holdkey);
-//        }
-//      });
-//      
-//      if (i < 0) {
-//        i = -(i + 1); // i is (-(insertion point) - 1) 
-//      }
-//    }
-    // in this impl, the id is the index.
-    final int sz = id2fsw.size();
-    for (int i = fromId; i < sz; i++) {
-      Object o = id2fsw.get(i);
-      if (o == null) { 
-        continue;
-      }
-      
-      if (o instanceof TOP) {
-        action.accept((TOP)o);
-      } else {
-        TOP fs = ((WeakReference<TOP>)o).get();
-        if (fs == null) {
-          id2fsw.set(i, null);
-          continue;
-        }
-        action.accept(fs);
-      }
-    }   
-  }
+
 }