You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2016/09/07 21:33:09 UTC
svn commit: r1759710 - in
/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl:
BinaryCasSerDes.java CASSerializer.java CommonSerDesSequential.java
Author: schor
Date: Wed Sep 7 21:33:09 2016
New Revision: 1759710
URL: http://svn.apache.org/viewvc?rev=1759710&view=rev
Log:
[UIMA-4663] clarify purpose of values stored in BinaryCasSerDes (only for plain binary) vs CommonSerDesSequential, and model the aux arrays for deserialization for binary.
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java
Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java?rev=1759710&r1=1759709&r2=1759710&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java Wed Sep 7 21:33:09 2016
@@ -64,10 +64,17 @@ import org.apache.uima.util.CasLoadMode;
* Binary (mostly non compressed) CAS deserialization
* The methods in this class were originally part of the CASImpl, and were moved here to this class for v3
*
- * Binary non compressed CAS serialization is in class CASSerializer, but that class uses routine and data structures
+ * Binary non compressed CAS serialization is in class CASSerializer, but that class uses routines and data structures
* in this class.
*
- * There is one instance of this class per CAS (shared by all views of that CAS).
+ * There is one instance of this class per CAS (shared by all views of that CAS), created at the same time the
+ * CAS is created.
+ *
+ * This instance also holds data needed for binary serialization, and deserialization.
+ * For binary delta deserialization, it uses the data computed on a previous serialization,
+ * or, if none, it re-computes it. See scanAllFSsForBinarySerialization method.
+ *
+ * The data is computed lazily, and reset with cas reset.
*
* Lifecycle:
* created when a CAS (any view) is first created, as part of the shared view data for that CAS.
@@ -75,8 +82,6 @@ import org.apache.uima.util.CasLoadMode;
*
* Data created when non-delta serializing, in case needed when delta-deserializing later:
* xxxAuxAddr2fsa maps aux arrays to FSs
- *
- * Data created when non-delta deserializing, in case needed when delta-serializing later:
* heaps and nextXXXHeapAddrAfterMark (in this case mark is the end).
*
* Reset:
@@ -176,10 +181,13 @@ public class BinaryCasSerDes {
/**
* Map from an aux addr starting address for an array of boolean/byte/short/long/double to the V3 FS.
+ * key = simulated starting address in aux heap for the array
+ * value = FS having that array
* When deserializing a modification, used to find the v3 FS and the offset in the array to modify.
*
- * created when serializing (in case receive delta deser back)
- * updated when delta deserializing
+ * created when serializing (in case receive delta deser back).
+ * created when delta deserializing if not available from previous serialization.
+ * updated when delta deserializing.
* reset at end of delta deserializings because multiple mods not supported
*/
final private Int2ObjHashMap<TOP> byteAuxAddr2fsa = new Int2ObjHashMap<>(TOP.class);
@@ -523,7 +531,7 @@ public class BinaryCasSerDes {
if (h.isCompressed) {
if (TRACE_DESER) {
- System.out.format("BinDeser version = %d%n", h.v);
+ System.out.format("BinDeser version = %d%n", Integer.valueOf(h.v));
}
if (h.form4) {
(new BinaryCasSerDes4(baseCas.getTypeSystemImpl(), false))
@@ -603,7 +611,7 @@ public class BinaryCasSerDes {
* CAS(2) has updates - new FSs, and mods to existing ones
* CAS(2) -> delta binary ser -> delta binary deser -> CAS(1).
*
- * V3 supports the above scenario by retaining information in CAS(2) at the
+ * V3 supports the above scenario by retaining some information in CAS(2) at the
* end of the initial deserialization, including the model heap size/cellsUsed.
* - this is needed to properly do a compatible-with-v2 delta serialization.
@@ -616,13 +624,12 @@ public class BinaryCasSerDes {
*
* This method assumes a previous binary serialization was done, and the following data structures
* are still valid (i.e. no CAS altering operations have been done)
- * (maybe these are reset: heap, stringHeap, byteHeap, shortHeap, longHeap)
- * csds, [string/byte/short/long]auxAddr2fs (for array mods)
+ * (these are reset: heap, stringHeap, byteHeap, shortHeap, longHeap)
+ * csds,
+ * [string/byte/short/long]auxAddr2fs (for array mods)
* nextHeapAddrAfterMark, next[string/byte/short/long]HeapAddrAfterMark
*
- * @param dis data input stream
- * @param swap true if byte order needs swapping
- * @param delta true if delta binary deserialization being received
+ * @param h the Header (read by the caller)
* @return the format of the incoming serialized data
*/
private SerialFormat binaryDeserialization(Header h) {
@@ -664,7 +671,7 @@ public class BinaryCasSerDes {
}
if (TRACE_DESER) {
System.out.format("BinDes Plain %s startPos: %,d mainHeapSize: %d%n",
- delta ? "Delta" : "", startPos, fsheapsz);
+ delta ? "Delta" : "", Integer.valueOf(startPos), Integer.valueOf(fsheapsz));
}
// add new heap slots
@@ -739,7 +746,7 @@ public class BinaryCasSerDes {
modWords[i] = r.readInt();
}
if (TRACE_DESER) {
- System.out.format("BinDes modified heap slot count: %,d%n", fsmodssz2 / 2);
+ System.out.format("BinDes modified heap slot count: %,d%n", Integer.valueOf(fsmodssz2 / 2));
}
} else {
fsmodssz2 = 0; // not used but must be set to make "final" work
@@ -750,19 +757,19 @@ public class BinaryCasSerDes {
// indexed FSs
int fsindexsz = r.readInt();
int[] fsindexes = new int[fsindexsz];
- if (TRACE_DESER) System.out.format("BinDes indexedFSs count: %,d%n", fsindexsz);
+ if (TRACE_DESER) System.out.format("BinDes indexedFSs count: %,d%n", Integer.valueOf(fsindexsz));
for (int i = 0; i < fsindexsz; i++) {
fsindexes[i] = r.readInt();
if (TRACE_DESER) {
- if (i % 5 == 0) System.out.format("%n i: %5d ", i);
- System.out.format("%15d ", fsindexes[i]);
+ if (i % 5 == 0) System.out.format("%n i: %5d ", Integer.valueOf(i));
+ System.out.format("%15d ", Integer.valueOf(fsindexes[i]));
}
}
if (TRACE_DESER) System.out.println("");
// byte heap
int heapsz = r.readInt();
- if (TRACE_DESER) System.out.format("BinDes ByteHeap size: %,d%n", heapsz);
+ if (TRACE_DESER) System.out.format("BinDes ByteHeap size: %,d%n", Integer.valueOf(heapsz));
if (!delta) {
byteHeap.heap = new byte[Math.max(16, heapsz)]; // must be > 0
@@ -778,7 +785,7 @@ public class BinaryCasSerDes {
// short heap
heapsz = r.readInt();
- if (TRACE_DESER) System.out.format("BinDes ShortHeap size: %,d%n", heapsz);
+ if (TRACE_DESER) System.out.format("BinDes ShortHeap size: %,d%n", Integer.valueOf(heapsz));
if (!delta) {
shortHeap.heap = new short[Math.max(16, heapsz)]; // must be > 0
@@ -800,7 +807,7 @@ public class BinaryCasSerDes {
// long heap
heapsz = r.readInt();
- if (TRACE_DESER) System.out.format("BinDes LongHeap size: %,d%n", heapsz);
+ if (TRACE_DESER) System.out.format("BinDes LongHeap size: %,d%n", Integer.valueOf(heapsz));
if (!delta) {
longHeap.heap = new long[Math.max(16, heapsz)]; // must be > 0
@@ -927,10 +934,7 @@ public class BinaryCasSerDes {
longHeap = null;
// cleared because only used for delta deser, for mods, and mods not allowed for multiple deltas
- byteAuxAddr2fsa.clear();
- shortAuxAddr2fsa.clear();
- longAuxAddr2fsa.clear();
-
+ clearAuxAddr2fsa();
} catch (IOException e) {
String msg = e.getMessage();
if (msg == null) {
@@ -1323,18 +1327,20 @@ public class BinaryCasSerDes {
/**
- * Called when serializing a cas.
+ * Called when serializing a cas, or deserializing a delta CAS, if not saved in that case from a previous
+ * binary serialization (in that case, the scan is done as if it is doing a non-delta serialization).
*
* Initialize the serialization model for binary serialization in CASSerializer from a CAS
* Do 2 scans, each by walking all the reachable FSs
* - The first one processes all fs (including for delta, those below the line)
* -- computes the fs to addr map and its inverse, based on the size of each FS.
+ * -- done by CommonSerDesSequential class's "setup" method
*
* - The second one computes the values of the main and aux heaps and string heaps except for delta mods
* -- for delta, the heaps only have "new" values that binary serialization will write out as arrays
* --- mods are computed from FsChange info and added to the appropriate heaps, later
*
- * - for byte/short/long/string array use, compute auxAddr2fsa map.
+ * - for byte/short/long/string array use, compute auxAddr2fsa maps.
* This is used when deserializing delta mod info, to locate the fs to update
*
* For delta serialization, the heaps are populated only with the new values.
@@ -1350,16 +1356,20 @@ public class BinaryCasSerDes {
* @param cs the CASSerializer instance used to record the results of the scan
* @param mark null or the mark to use for separating the new from from the previously existing
* used by delta cas.
+ * @return null or for delta, all the found FSs
*/
- void scanAllFSsForBinarySerialization(MarkerImpl mark, CommonSerDesSequential csds) {
+ List<TOP> scanAllFSsForBinarySerialization(MarkerImpl mark, CommonSerDesSequential csds) {
final boolean isMarkSet = mark != null;
+ List<TOP> all = null;
+ int prevHeapEnd = csds.getHeapEnd(); // used if mark is set
if (isMarkSet) {
- csds.setup(mark, csds.getHeapEnd()); // add new stuff to existing csds
- } // otherwise, it's set up using null, 1 as the arguments
+
+ all = csds.setup(mark, csds.getHeapEnd()); // add new stuff to existing csds
+ } // otherwise, it's set up already, using null, 1 as the arguments, when getCsds() is called
// For delta, these heaps will start at 1, and only hold new items
- heap = new Heap(csds.getHeapEnd());
+ heap = new Heap(isMarkSet ? (1 + csds.getHeapEnd() - prevHeapEnd) : csds.getHeapEnd());
byteHeap = new ByteHeap();
shortHeap = new ShortHeap();
longHeap = new LongHeap();
@@ -1367,18 +1377,31 @@ public class BinaryCasSerDes {
if (!isMarkSet) {
clearDeltaOffsets(); // set nextXXheapAfterMark to 0;
+ clearAuxAddr2fsa();
}
- List<TOP> itemsToExtract = isMarkSet ? CASImpl.filterAboveMark(csds.getSortedFSs(), mark) : csds.getSortedFSs();
+ List<TOP> itemsToExtract = csds.getSortedFSs();
+// isMarkSet ? CASImpl.filterAboveMark(csds.getSortedFSs(), mark) : csds.getSortedFSs();
for (TOP fs : itemsToExtract) {
if (!isMarkSet || mark.isNew(fs)) {
// skip extraction for FSs below the mark.
// - updated slots will update aux heaps when delta mods are processed
extractFsToV2Heaps(fs, isMarkSet, csds.fs2addr);
}
- }
+ }
+
+ return all;
}
+// /**
+// * to support serializing addr in aux arrays for modifications below the mark,
+// * scan to compute the starting address of each array that's below the mark
+// * and build maps from Array FSs to aux array starting addresses
+// */
+// void scanAllFSsForBinaryDeltaSerialization(MarkerImpl mark, CommonSerDesSequential csds) {
+//
+// }
+//
/**
* called in fs._id order to populate heaps from all FSs.
*
@@ -1422,7 +1445,8 @@ public class BinaryCasSerDes {
case Slot_BooleanRef: {
int baAddr = byteHeap .addBooleanArray(((BooleanArray)fs)._getTheArray());
- heap.heap[i] = nextByteHeapAddrAfterMark + baAddr;
+ heap.heap[i] = nextByteHeapAddrAfterMark + baAddr;
+ byteAuxAddr2fsa.put(nextByteHeapAddrAfterMark + baAddr, fs);
// // hack to find first above-the-mark ref
// if (isMarkSet && baAddr < nextByteHeapAddrAfterMark) {
// nextByteHeapAddrAfterMark = baAddr;
@@ -1432,7 +1456,8 @@ public class BinaryCasSerDes {
case Slot_ByteRef: {
int baAddr = byteHeap .addByteArray (((ByteArray )fs)._getTheArray());
- heap.heap[i] = nextByteHeapAddrAfterMark + baAddr;
+ heap.heap[i] = nextByteHeapAddrAfterMark + baAddr;
+ byteAuxAddr2fsa.put(nextByteHeapAddrAfterMark + baAddr, fs);
// // hack to find first above-the-mark ref
// if (isMarkSet && baAddr < nextByteHeapAddrAfterMark) {
// nextByteHeapAddrAfterMark = baAddr;
@@ -1442,17 +1467,20 @@ public class BinaryCasSerDes {
case Slot_ShortRef: {
int saAddr = shortHeap.addShortArray (((ShortArray )fs)._getTheArray());
heap.heap[i] = nextShortHeapAddrAfterMark + saAddr;
+ shortAuxAddr2fsa.put(nextShortHeapAddrAfterMark + saAddr, fs);
}
break;
case Slot_LongRef: {
int laAddr = longHeap .addLongArray (((LongArray )fs)._getTheArray());
heap.heap[i] = nextLongHeapAddrAfterMark + laAddr;
+ longAuxAddr2fsa.put(nextLongHeapAddrAfterMark + laAddr, fs);
break;
}
case Slot_DoubleRef: {
int laAddr = longHeap .addDoubleArray (((DoubleArray )fs)._getTheArray());
heap.heap[i] = nextLongHeapAddrAfterMark + laAddr;
+ longAuxAddr2fsa.put(nextLongHeapAddrAfterMark + laAddr, fs);
break;
}
case Slot_HeapRef:
@@ -1533,7 +1561,7 @@ public class BinaryCasSerDes {
for (int heapIndex = startPos; heapIndex < heapsz; heapIndex += getFsSpaceReq(fs, type)) {
type = tsi.getTypeForCode(heap.heap[heapIndex]);
if (type == null) {
- throw new CASRuntimeException(CASRuntimeException.deserialized_type_not_found, heap.heap[heapIndex]);
+ throw new CASRuntimeException(CASRuntimeException.deserialized_type_not_found, Integer.valueOf(heap.heap[heapIndex]));
}
if (type.isArray()) {
final int len = heap.heap[heapIndex + arrayLengthFeatOffset];
@@ -1856,4 +1884,22 @@ public class BinaryCasSerDes {
nextShortHeapAddrAfterMark = 0;
nextLongHeapAddrAfterMark = 0;
}
+
+ private void clearAuxAddr2fsa() {
+ byteAuxAddr2fsa.clear();
+ shortAuxAddr2fsa.clear();
+ longAuxAddr2fsa.clear();
+ }
+ /**
+ * called by cas reset
+ */
+ public void clear() {
+ clearDeltaOffsets();
+ clearAuxAddr2fsa();
+ heap = null;
+ byteHeap = null;
+ shortHeap = null;
+ longHeap = null;
+ stringHeap = null;
+ }
}
\ No newline at end of file
Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java?rev=1759710&r1=1759709&r2=1759710&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java Wed Sep 7 21:33:09 2016
@@ -23,17 +23,20 @@ import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Serializable;
+import java.util.ArrayList;
import java.util.BitSet;
+import java.util.List;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.Marker;
-import org.apache.uima.cas.function.IntConsumer_withIOException;
+import org.apache.uima.cas.function.Consumer_T_withIOException;
import org.apache.uima.cas.impl.CASImpl.FsChange;
-import org.apache.uima.internal.util.IntVector;
+import org.apache.uima.cas.impl.SlotKinds.SlotKind;
import org.apache.uima.internal.util.Misc;
import org.apache.uima.internal.util.Obj2IntIdentityHashMap;
import org.apache.uima.jcas.cas.BooleanArray;
import org.apache.uima.jcas.cas.ByteArray;
+import org.apache.uima.jcas.cas.CommonArray;
import org.apache.uima.jcas.cas.DoubleArray;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.FloatArray;
@@ -84,6 +87,15 @@ public class CASSerializer implements Se
static final long serialVersionUID = -7972011651957420295L;
+ static class AddrPlusValue {
+ final int addr; // heap or aux heap addr
+ final long value; // boolean, byte, short, long, double value
+ AddrPlusValue(int addr, long value) {
+ this.addr = addr;
+ this.value = value;
+ }
+ }
+
// The heap itself.
public int[] heapArray = null;
@@ -140,7 +152,7 @@ public class CASSerializer implements Se
public void addCAS(CASImpl cas, boolean addMetaData) {
BinaryCasSerDes bcsd = cas.getBinaryCasSerDes();
final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(cas.getBaseCAS(), false); // saves the csds in the cas
- scanAllFSsForBinarySerialization(bcsd, null, csds); // populates the arrays
+ bcsd.scanAllFSsForBinarySerialization(null, csds); // no mark
this.fsIndex = bcsd.getIndexedFSs(csds.fs2addr); // must follow scanAll...
if (addMetaData) {
@@ -261,7 +273,7 @@ public class CASSerializer implements Se
final BinaryCasSerDes bcsd = cas.getBinaryCasSerDes();
final CommonSerDesSequential csds = BinaryCasSerDes4.getCsds(cas.getBaseCAS(), false); // saves the csds in the cas, used for delta
- scanAllFSsForBinarySerialization(bcsd, null, csds); // populates the arrays
+ bcsd.scanAllFSsForBinarySerialization(null, csds); // no mark
try {
@@ -445,8 +457,47 @@ public class CASSerializer implements Se
// if (csds.getHeapEnd() == 0) {
// System.out.println("debug");
// }
- scanAllFSsForBinarySerialization(bcsd, mark, csds); // populates the arrays
-
+ final Obj2IntIdentityHashMap<TOP> fs2auxOffset = new Obj2IntIdentityHashMap<TOP>(TOP.class, TOP._singleton);
+
+ List<TOP> all = bcsd.scanAllFSsForBinarySerialization(mark, csds);
+
+ int byteOffset = 1;
+ int shortOffset = 1;
+ int longOffset = 1;
+
+ // scan all below mark and set up maps from aux array FSs to the offset to where the array starts in the modelled aux heap
+ for (TOP fs : all) {
+ if (trackingMark.isNew(fs)) {
+ break;
+ }
+ if (fs instanceof CommonArray) {
+ CommonArray ca = (CommonArray) fs;
+ SlotKind kind = fs._getTypeImpl().getComponentSlotKind();
+ switch (kind) {
+ case Slot_BooleanRef:
+ case Slot_ByteRef :
+ fs2auxOffset.put(fs, byteOffset);
+ byteOffset += ca.size();
+ break;
+ case Slot_ShortRef:
+ fs2auxOffset.put(fs, shortOffset);
+ shortOffset += ca.size();
+ break;
+ case Slot_LongRef:
+ case Slot_DoubleRef:
+ fs2auxOffset.put(fs, longOffset);
+ longOffset += ca.size();
+ break;
+ default:
+ } // end of switch
+ } // end of if commonarray
+ else { // fs has feature slots
+ // model long and double refs which use up the long aux heap for 1 cell
+ TypeImpl ti = fs._getTypeImpl();
+ longOffset += ti.getNbrOfLongOrDoubleFeatures();
+ }
+ } // end of for
+
try {
DataOutputStream dos = new DataOutputStream(ostream);
@@ -475,24 +526,14 @@ public class CASSerializer implements Se
// we do this before the strings or aux arrays are written out, because this
// could make additions to those.
- // addresses are in terms of modeled v2 arrays
- IntVector chgMainHeapAddr = new IntVector();
- IntVector chgMainHeapValues = new IntVector();
-
- IntVector chgByteAddr = new IntVector();
- ByteHeap chgByteValues = new ByteHeap();
-
- IntVector chgShortAddr = new IntVector();
- ShortHeap chgShortValues = new ShortHeap();
-
- IntVector chgLongAddr = new IntVector();
- LongHeap chgLongValues = new LongHeap();
+ // addresses are in terms of modeled v2 arrays, as absolute addr in the aux arrays, and values
+ List<AddrPlusValue> chgMainAvs = new ArrayList<>();
+ List<AddrPlusValue> chgByteAvs = new ArrayList<>();
+ List<AddrPlusValue> chgShortAvs = new ArrayList<>();
+ List<AddrPlusValue> chgLongAvs = new ArrayList<>();
- scanModifications(bcsd, csds, cas.getModifiedFSList(),
- chgMainHeapAddr, chgMainHeapValues,
- chgByteAddr, chgByteValues,
- chgShortAddr, chgShortValues,
- chgLongAddr, chgLongValues);
+ scanModifications(bcsd, csds, cas.getModifiedFSList(), fs2auxOffset,
+ chgMainAvs, chgByteAvs, chgShortAvs, chgLongAvs);
// output the new strings
StringHeapDeserializationHelper shdh = bcsd.stringHeap.serialize(1);
@@ -503,11 +544,11 @@ public class CASSerializer implements Se
// this is output in a way that is the total number of slots changed ==
// the sum over all fsChanges of
// for each fsChange, the number of slots (heap-sited-array or feature) modified
- final int modHeapSize = chgMainHeapAddr.size();
+ final int modHeapSize = chgMainAvs.size();
dos.writeInt(modHeapSize); //num modified
- for (int i = 0; i < modHeapSize; i++) {
- dos.writeInt(chgMainHeapAddr .get(i));
- dos.writeInt(chgMainHeapValues.get(i));
+ for (AddrPlusValue av : chgMainAvs) {
+ dos.writeInt(av.addr);
+ dos.writeInt((int)av.value);
}
// output the index FSs
@@ -547,24 +588,24 @@ public class CASSerializer implements Se
}
// 8bit heap modified cells
- writeMods(chgByteAddr, dos, i -> dos.writeByte(chgByteValues.heap[i]));
+ writeMods(chgByteAvs, dos, av -> dos.writeByte((byte)av.value));
// word alignment
- align = (4 - (chgByteAddr.size() % 4)) % 4;
+ align = (4 - (chgByteAvs.size() % 4)) % 4;
for (int i = 0; i < align; i++) {
dos.writeByte(0);
}
// 16bit heap modified cells
- writeMods(chgShortAddr, dos, i -> dos.writeShort(chgShortValues.heap[i]));
+ writeMods(chgShortAvs, dos, av -> dos.writeShort((short)av.value));
// word alignment
- if (chgShortAddr.size() % 2 != 0) {
+ if (chgShortAvs.size() % 2 != 0) {
dos.writeShort(0);
}
// 64bit heap modified cells
- writeMods(chgLongAddr, dos, i -> dos.writeLong(chgLongValues.heap[i]));
+ writeMods(chgLongAvs, dos, av -> dos.writeLong(av.value));
} catch (IOException e) {
throw new CASRuntimeException(CASRuntimeException.BLOB_SERIALIZATION, e.getMessage());
@@ -572,30 +613,29 @@ public class CASSerializer implements Se
}
- private void writeMods(IntVector chgAddr, DataOutputStream dos, IntConsumer_withIOException writeValue) throws IOException {
- int size = chgAddr.size();
+ private void writeMods(
+ List<AddrPlusValue> avs,
+ DataOutputStream dos,
+ Consumer_T_withIOException<AddrPlusValue> writeValue) throws IOException {
+ int size = avs.size();
dos.writeInt(size);
- for (int i = 0; i < size; i++) {
- dos.writeInt(chgAddr.get(i));
+ for (AddrPlusValue av : avs) {
+ dos.writeInt(av.addr);
}
- for (int i = 1; i <= size; i++) { // <= because start loop at 1
- writeValue.accept(i);
- // example
- // dos.writeLong(chgLongValues.heap[i]);
+ for (AddrPlusValue av : avs) {
+ writeValue.accept(av);
}
}
/**
* The offset in the modeled heaps:
- * For aux arrays:
- * fs mapto mainHeapAddr, fetch root of aux array from slot + 2, add index
- * For main heap arrays:
- * fs mapto mainHeapAddr, slot + 2 + index
* @param index the 0-based index into the array
* @param fs the feature structure representing the array
* @return the addr into an aux array or main heap
*/
- private static int convertArrayIndexToAuxHeapAddr(BinaryCasSerDes bcsd, int index, TOP fs, Obj2IntIdentityHashMap<TOP> fs2addr) {
- return bcsd.heap.heap[fs2addr.get(fs) + 2] + index;
+ private static int convertArrayIndexToAuxHeapAddr(BinaryCasSerDes bcsd, int index, TOP fs, Obj2IntIdentityHashMap<TOP> fs2auxOffset) {
+ int offset = fs2auxOffset.get(fs);
+ assert offset > 0;
+ return offset;
}
private static int convertArrayIndexToMainHeapAddr(int index, TOP fs, Obj2IntIdentityHashMap<TOP> fs2addr) {
@@ -609,18 +649,22 @@ public class CASSerializer implements Se
*
* A prescan approach is needed in order to write the number of modifications preceeding the
* write of the values (which unfortunately were written to the same stream in V2).
- * @param bcsd -
- * @param cas -
- * @param chgMainHeapAdd -
- * @param chgByteAddr -
- * @param chgShortAddr -
- * @param chgLongAddr -
+ * @param bcsd holds the model needed for v2 aux arrays
+ * @param cas the cas to use for the delta serialization
+ * @param chgMainHeapAddr an ordered collection of changed addresses as an array for the main heap
+ * @param chgByteAddr an ordered collection of changed addresses as an array for the aux byte heap
+ * @param chgShortAddr an ordered collection of changed addresses as an array for the aus short heap
+ * @param chgLongAddr an ordered collection of changed addresses as an array for the aux long heap
+ *
+ * @param chgMainHeapValue corresponding values
*/
- static void scanModifications(BinaryCasSerDes bcsd, CommonSerDesSequential csds, FsChange[] fssModified,
- IntVector chgMainHeapAddr, IntVector chgMainHeapValue,
- IntVector chgByteAddr, ByteHeap chgByteValue,
- IntVector chgShortAddr, ShortHeap chgShortValue,
- IntVector chgLongAddr, LongHeap chgLongValue) {
+ static void scanModifications(BinaryCasSerDes bcsd, CommonSerDesSequential csds, FsChange[] fssModified,
+ Obj2IntIdentityHashMap<TOP> fs2auxOffset,
+ List<AddrPlusValue> chgMainAvs,
+ List<AddrPlusValue> chgByteAvs,
+ List<AddrPlusValue> chgShortAvs,
+ List<AddrPlusValue> chgLongAvs
+ ) {
// scan the sorted mods to precompute the various change items:
// changed main heap: addr and new slot value
@@ -635,67 +679,67 @@ public class CASSerializer implements Se
if (fsChange.arrayUpdates != null) {
switch(type.getComponentSlotKind()) {
- case Slot_Boolean:
+ case Slot_BooleanRef:
fsChange.arrayUpdates.forAllInts(index -> {
- chgByteAddr.add(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2addr));
- chgByteValue.addByte((((BooleanArray)fs).get(index) ? (byte)1 : (byte)0));
+ chgByteAvs.add(new AddrPlusValue(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2auxOffset),
+ ((BooleanArray)fs).get(index) ? 1 : 0));
});
break;
- case Slot_Byte:
+ case Slot_ByteRef:
fsChange.arrayUpdates.forAllInts(index -> {
- chgByteAddr.add(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2addr));
- chgByteValue.addByte(((ByteArray)fs).get(index));
+ chgByteAvs.add(new AddrPlusValue(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2auxOffset),
+ ((ByteArray)fs).get(index)));
});
break;
- case Slot_Short:
+ case Slot_ShortRef:
fsChange.arrayUpdates.forAllInts(index -> {
- chgShortAddr.add(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2addr));
- chgShortValue.addShort(((ShortArray)fs).get(index));
+ chgShortAvs.add(new AddrPlusValue(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2auxOffset),
+ ((ShortArray)fs).get(index)));
});
break;
case Slot_LongRef:
fsChange.arrayUpdates.forAllInts(index -> {
- chgLongAddr.add(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2addr));
- chgLongValue.addLong(((LongArray)fs).get(index));
+ chgLongAvs.add(new AddrPlusValue(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2auxOffset),
+ ((LongArray)fs).get(index)));
});
break;
case Slot_DoubleRef:
fsChange.arrayUpdates.forAllInts(index -> {
- chgLongAddr.add(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2addr));
- chgLongValue.addLong(CASImpl.double2long(((DoubleArray)fs).get(index)));
+ chgLongAvs.add(new AddrPlusValue(convertArrayIndexToAuxHeapAddr(bcsd, index, fs, fs2auxOffset),
+ CASImpl.double2long(((DoubleArray)fs).get(index))));
});
break;
// heap stored arrays
case Slot_Int:
fsChange.arrayUpdates.forAllInts(index -> {
- chgMainHeapAddr.add(convertArrayIndexToMainHeapAddr(index, fs, fs2addr));
- chgMainHeapValue.add(((IntegerArray)fs).get(index));
+ chgMainAvs.add(new AddrPlusValue(convertArrayIndexToMainHeapAddr(index, fs, fs2addr),
+ ((IntegerArray)fs).get(index)));
});
break;
+
case Slot_Float:
fsChange.arrayUpdates.forAllInts(index -> {
- chgMainHeapAddr.add(convertArrayIndexToMainHeapAddr(index, fs, fs2addr));
- chgMainHeapValue.add(CASImpl.float2int(((FloatArray)fs).get(index)));
+ chgMainAvs.add(new AddrPlusValue(convertArrayIndexToMainHeapAddr(index, fs, fs2addr),
+ CASImpl.float2int(((FloatArray)fs).get(index))));
});
break;
+
case Slot_StrRef:
fsChange.arrayUpdates.forAllInts(index -> {
- chgMainHeapAddr.add(convertArrayIndexToMainHeapAddr(index, fs, fs2addr));
int v = bcsd.nextStringHeapAddrAfterMark + bcsd.stringHeap.addString(((StringArray)fs).get(index));
- chgMainHeapValue.add(v);
+ chgMainAvs.add(new AddrPlusValue(convertArrayIndexToMainHeapAddr(index, fs, fs2addr), v));
});
break;
case Slot_HeapRef:
fsChange.arrayUpdates.forAllInts(index -> {
- chgMainHeapAddr.add(convertArrayIndexToMainHeapAddr(index, fs, fs2addr));
TOP tgtFs = ((FSArray)fs).get(index);
- chgMainHeapValue.add(fs2addr.get(tgtFs));
+ chgMainAvs.add(new AddrPlusValue(convertArrayIndexToMainHeapAddr(index, fs, fs2addr), fs2addr.get(tgtFs)));
});
break;
@@ -705,35 +749,36 @@ public class CASSerializer implements Se
BitSet fm = fsChange.featuresModified;
int offset = fm.nextSetBit(0);
while (offset >= 0) {
- chgMainHeapAddr.add(csds.fs2addr.get(fs) + offset + 1); // skip over type code);
+ int addr = csds.fs2addr.get(fs) + offset + 1; // skip over type code);
+ int value = 0;
+
FeatureImpl feat = type.getFeatureImpls()[offset];
switch (feat.getSlotKind()) {
- case Slot_Boolean: chgMainHeapValue.add(fs._getBooleanValueNc(feat) ? 1 : 0); break;
+ case Slot_Boolean: value = fs._getBooleanValueNc(feat) ? 1 : 0; break;
- case Slot_Byte: chgMainHeapValue.add(fs._getByteValueNc(feat)); break;
- case Slot_Short: chgMainHeapValue.add(fs._getShortValueNc(feat)); break;
- case Slot_Int: chgMainHeapValue.add(fs._getIntValueNc(feat)); break;
- case Slot_Float: chgMainHeapValue.add(CASImpl.float2int(fs._getFloatValueNc(feat))); break;
+ case Slot_Byte: value = fs._getByteValueNc(feat); break;
+ case Slot_Short: value = fs._getShortValueNc(feat); break;
+ case Slot_Int: value = fs._getIntValueNc(feat); break;
+ case Slot_Float: value = CASImpl.float2int(fs._getFloatValueNc(feat)); break;
case Slot_LongRef: {
- int v = bcsd.nextLongHeapAddrAfterMark + bcsd.longHeap.addLong(fs._getLongValueNc(feat));
- chgMainHeapValue.add(v);
+ value = bcsd.nextLongHeapAddrAfterMark + bcsd.longHeap.addLong(fs._getLongValueNc(feat));
break;
}
case Slot_DoubleRef: {
- int v = bcsd.nextLongHeapAddrAfterMark + bcsd.longHeap.addLong(CASImpl.double2long(fs._getDoubleValueNc(feat)));
- chgMainHeapValue.add(v);
+ value = bcsd.nextLongHeapAddrAfterMark + bcsd.longHeap.addLong(CASImpl.double2long(fs._getDoubleValueNc(feat)));
break;
}
case Slot_StrRef: {
- int v = bcsd.nextStringHeapAddrAfterMark + bcsd.stringHeap.addString(fs._getStringValueNc(feat));
- chgMainHeapValue.add(v);
+ value = bcsd.nextStringHeapAddrAfterMark + bcsd.stringHeap.addString(fs._getStringValueNc(feat));
break;
}
- case Slot_HeapRef: chgMainHeapValue.add(fs2addr.get(fs._getFeatureValueNc(feat))); break;
+ case Slot_HeapRef: value = fs2addr.get(fs._getFeatureValueNc(feat)); break;
default: Misc.internalError();
} // end of switch
+ chgMainAvs.add(new AddrPlusValue(addr, value));
+
offset = fm.nextSetBit(offset + 1);
} // loop over changed feature offsets
} // end of features-modified case
@@ -794,18 +839,7 @@ public class CASSerializer implements Se
long[] getLongArray() {
return this.longHeapArray;
}
-
- /**
- * For delta serialization,
- * - scans all FSs to compute addr2fs and fs2addr tables
- * - scans new FSs to compute delta heap, aux heap, and strings to serialize
- * @param bcsd -
- * @param mark null or the mark if delta serialization
- */
- private void scanAllFSsForBinarySerialization(BinaryCasSerDes bcsd, MarkerImpl mark, CommonSerDesSequential csds) {
- bcsd.scanAllFSsForBinarySerialization(mark, csds);
- }
-
+
private void copyHeapsToArrays(BinaryCasSerDes bcsd) {
this.heapArray = bcsd.heap.toArray();
this.byteHeapArray = bcsd.byteHeap.toArray();
Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java?rev=1759710&r1=1759709&r2=1759710&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDesSequential.java Wed Sep 7 21:33:09 2016
@@ -19,12 +19,10 @@
package org.apache.uima.cas.impl;
-import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
-import org.apache.uima.cas.function.Consumer_T_withIOException;
import org.apache.uima.internal.util.Int2ObjHashMap;
import org.apache.uima.internal.util.Misc;
import org.apache.uima.internal.util.Obj2IntIdentityHashMap;
@@ -79,10 +77,9 @@ public class CommonSerDesSequential {
/**
* The FSs in this list are not necessarily sequential, but is in ascending (simulated heap) order,
* needed for V2 compatibility of serialized forms.
- * This is populated from the main CAS's id-to-fs map, which is accessed once;
- * Subsequent accessing of that could return different lists due to an intervening Garbage Collection.
+ * This is populated either during deserialization, or for serialization, from indexed + reachable.
*
- * Before accessing this, any pending items must be merged.
+ * Before accessing this, any pending items must be merged (sorting done lazily)
*/
final private List<TOP> sortedFSs = new ArrayList<>(); // holds the FSs sorted by id
@@ -137,24 +134,39 @@ public class CommonSerDesSequential {
heapEnd = 0;
}
- void setup(MarkerImpl mark, int fromAddr) {
+ /**
+ * Scan all indexed + reachable FSs, sorted, and
+ * - create two maps from those to/from the int offsets in the simulated main heap
+ * - add all the (filtered - above the mark) FSs to the sortedFSs
+ * - set the heapEnd
+ * @param mark null or the mark
+ * @param fromAddr often 1 but sometimes the mark next fsid
+ * @return all (not filtered) FSs sorted
+ */
+ List<TOP> setup(MarkerImpl mark, int fromAddr) {
if (mark == null) {
clear();
}
// local value as "final" to permit use in lambda below
- final int[] nextAddr = {fromAddr};
+ int nextAddr = fromAddr;
if (TRACE_SETUP) System.out.println("Cmn serDes sequential setup called by: " + Misc.getCaller());
- List<TOP> allAboveMark = baseCas.walkReachablePlusFSsSorted(fs -> {
- addFS1(fs, nextAddr[0]);
- if (TRACE_SETUP) {
- System.out.format("Cmn serDes sequential setup: add FS id: %,4d addr: %,5d type: %s%n", fs._id, nextAddr[0], fs._getTypeImpl().getShortName());
- }
- nextAddr[0] += BinaryCasSerDes.getFsSpaceReq(fs, fs._getTypeImpl());
- }, mark, null, null);
+ List<TOP> all = new AllFSs(baseCas).getAllFSsSorted();
+ List<TOP> filtered = CASImpl.filterAboveMark(all, mark);
+ for (TOP fs : filtered) {
+ addFS1(fs, nextAddr); // doesn't update sortedFSs, that will be done below in batch
+ if (TRACE_SETUP) {
+ System.out.format("Cmn serDes sequential setup: add FS id: %,4d addr: %,5d type: %s%n",
+ Integer.valueOf(fs._id),
+ Integer.valueOf(nextAddr),
+ fs._getTypeImpl().getShortName());
+ }
+ nextAddr += BinaryCasSerDes.getFsSpaceReq(fs, fs._getTypeImpl());
+ }
- sortedFSs.addAll(allAboveMark);
- heapEnd = nextAddr[0];
+ sortedFSs.addAll(filtered);
+ heapEnd = nextAddr;
+ return all;
// if (heapEnd == 0) {
// System.out.println("debug");
// }
@@ -172,6 +184,9 @@ public class CommonSerDesSequential {
// }
// }
//
+ /**
+ * @return sorted FSs above mark if mark set, otherwise all, sorted
+ */
List<TOP> getSortedFSs() {
if (pending.size() != 0) {
merge();