You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2013/01/22 23:19:35 UTC
svn commit: r1437217 - in
/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl:
BinaryCasSerDes4.java CASImpl.java CasTypeSystemMapper.java
TypeSystemImpl.java
Author: schor
Date: Tue Jan 22 22:19:34 2013
New Revision: 1437217
URL: http://svn.apache.org/viewvc?rev=1437217&view=rev
Log:
[UIMA-2498] prior to adding mapping, fix up the design of the APIs for compression. Add missing Mapper class.
Added:
uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java (with props)
Modified:
uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java
uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java
Modified: uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java?rev=1437217&r1=1437216&r2=1437217&view=diff
==============================================================================
--- uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java (original)
+++ uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java Tue Jan 22 22:19:34 2013
@@ -132,12 +132,14 @@ import org.apache.uima.util.impl.Seriali
* in identity-map cache (size limit = 10?) - key is target typesystemimpl.
* Defaulting:
* flags: doMeasurement, compressLevel, CompressStrategy
+ * Defaulting set in call to create instance of this class
* Per serialize call: cas, output, [target ts], [mark for delta]
* Per deserialize call: cas, input, [target ts]
*
- * This class has static methods with all args
- * CASImpl has instance method with defaulting args
- *
+ * CASImpl has instance method with defaulting args for serialization.
+ * CASImpl has reinit which works with compressed binary serialization objects
+ * if no type mapping
+ * If type mapping, (new BinaryCasSerDes4(sourceTypeSystem)).deserialize(in-steam, targetTypeSystem)
*/
public class BinaryCasSerDes4 {
@@ -315,28 +317,35 @@ public class BinaryCasSerDes4 {
*/
final private TypeSystemImpl ts;
final private boolean doMeasurements;
-
+ final private CompressLevel compressLevel;
+ final private CompressStrat compressStrategy;
/**
*
- * @param ts
- * @param doMeasurements - normally set this to false.
+ * @param ts Type System (the source type system)
+ * @param doMeasurements true if measurements should be collected
+ * @param compressLevel
+ * @param compressStrategy
*/
- public BinaryCasSerDes4(TypeSystemImpl ts, boolean doMeasurements) {
+ public BinaryCasSerDes4(TypeSystemImpl ts, boolean doMeasurements,
+ CompressLevel compressLevel, CompressStrat compressStrategy) {
this.ts = ts;
this.doMeasurements = doMeasurements;
-
+ this.compressLevel = compressLevel;
+ this.compressStrategy = compressStrategy;
+ }
+
+ public BinaryCasSerDes4(TypeSystemImpl ts) {
+ this(ts, false, CompressLevel.Default, CompressStrat.Default);
}
/**
- *
* @param cas
* @param out
* @param trackingMark
* @return null or serialization measurements (depending on setting of doMeasurements)
* @throws IOException
*/
- public SerializationMeasures serialize(AbstractCas cas, Object out, Marker trackingMark,
- CompressLevel compressLevel, CompressStrat compressStrategy) throws IOException {
+ public SerializationMeasures serialize(AbstractCas cas, Object out, Marker trackingMark) throws IOException {
SerializationMeasures sm = (doMeasurements) ? new SerializationMeasures() : null;
CASImpl casImpl = (CASImpl) ((cas instanceof JCas) ? ((JCas)cas).getCas(): cas);
if (null != trackingMark && !trackingMark.isValid() ) {
@@ -345,26 +354,44 @@ public class BinaryCasSerDes4 {
}
Serializer serializer = new Serializer(
- casImpl, makeDataOutputStream(out), (MarkerImpl) trackingMark, sm,
- compressLevel, compressStrategy);
+ casImpl, makeDataOutputStream(out), (MarkerImpl) trackingMark, sm);
serializer.serialize();
return sm;
}
-
- public SerializationMeasures serialize(AbstractCas cas, Object out, Marker trackingMark,
- CompressLevel compressLevel) throws IOException {
- return serialize(cas, out,trackingMark, compressLevel, CompressStrat.Default);
- }
-
- public SerializationMeasures serialize(AbstractCas cas, Object out, Marker trackingMark) throws IOException {
- return serialize(cas, out,trackingMark, CompressLevel.Default, CompressStrat.Default);
- }
public SerializationMeasures serialize(AbstractCas cas, Object out) throws IOException {
return serialize(cas, out, null);
}
+ public void deserialize(CASImpl cas, InputStream istream) throws IOException {
+ final DataInputStream dis = (istream instanceof DataInputStream) ?
+ (DataInputStream) istream : new DataInputStream(istream);
+
+ // key
+ // determine if byte swap if needed based on key
+ byte[] bytebuf = new byte[4];
+ bytebuf[0] = dis.readByte(); // U
+ bytebuf[1] = dis.readByte(); // I
+ bytebuf[2] = dis.readByte(); // M
+ bytebuf[3] = dis.readByte(); // A
+
+ // version
+ // version bit in 2's place indicates this is in delta format.
+ final int version = dis.readInt();
+ final boolean delta = ((version & 2) == 2);
+
+ cas = cas.getBaseCAS();
+ if (!delta) {
+ cas.resetNoQuestions();
+ }
+
+ if (0 == (version & 4)) {
+ throw new RuntimeException("non-compressed invalid object passed to BinaryCasSerDes4 deserialize");
+ }
+ deserialize(cas, istream, delta);
+ }
+
public void deserialize(CASImpl cas, InputStream deserIn, boolean isDelta) throws IOException {
DataInput in;
if (deserIn instanceof DataInputStream) {
@@ -406,8 +433,6 @@ public class BinaryCasSerDes4 {
final private Integer[] serializedTypeCode2Code = new Integer[ts.getTypeArraySize()]; // needs to be Integer to get comparator choice
final private int[] estimatedZipSize = new int[NBR_SLOT_KIND_ZIP_STREAMS]; // one entry for each output stream kind
final private OptimizeStrings os;
- final private CompressLevel compressLevel;
- final private CompressStrat compressStrategy;
// typeInfo is local to this serialization instance to permit multiple threads
private TypeInfo typeInfo; // type info for the current type being serialized
@@ -440,15 +465,11 @@ public class BinaryCasSerDes4 {
final private DataOutputStream strSeg_dos;
private Serializer(CASImpl cas, DataOutputStream serializedOut, MarkerImpl mark,
- SerializationMeasures sm,
- CompressLevel compressLevel,
- CompressStrat compressStrategy) {
+ SerializationMeasures sm) {
this.cas = cas;
this.serializedOut = serializedOut;
this.mark = mark;
this.sm = sm;
- this.compressLevel = compressLevel;
- this.compressStrategy = compressStrategy;
isDelta = (mark != null);
doMeasurement = (sm != null);
Modified: uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java?rev=1437217&r1=1437216&r2=1437217&view=diff
==============================================================================
--- uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java (original)
+++ uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java Tue Jan 22 22:19:34 2013
@@ -244,10 +244,6 @@ public class CASImpl extends AbstractCas
*/
private List<Marker> trackingMarkList;
- // must be in svd part because has a field that is updated
- // while serializing
- private BinaryCasSerDes4 binaryCompressor;
-
private SharedViewData(boolean useFSCache) {
this.useFSCache = useFSCache;
}
@@ -1193,10 +1189,7 @@ public class CASImpl extends AbstractCas
}
if (0 != (version & 4)) {
- if (svd.binaryCompressor == null) {
- svd.binaryCompressor = new BinaryCasSerDes4(this.getTypeSystemImpl(), false);
- }
- svd.binaryCompressor.deserialize(this, dis, delta);
+ (new BinaryCasSerDes4(this.getTypeSystemImpl())).deserialize(this, dis, delta);
return;
}
@@ -4294,10 +4287,7 @@ public class CASImpl extends AbstractCas
* @throws IOException
*/
public void serializeWithCompression(Object out) throws IOException {
- if (svd.binaryCompressor == null) {
- svd.binaryCompressor = new BinaryCasSerDes4(this.getTypeSystemImpl(), false);
- }
- svd.binaryCompressor.serialize(this, out);
+ (new BinaryCasSerDes4(this.getTypeSystemImpl())).serialize(this, out);
}
}
Added: uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java?rev=1437217&view=auto
==============================================================================
--- uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java (added)
+++ uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java Tue Jan 22 22:19:34 2013
@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.cas.impl;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.uima.cas.Type;
+
+/**
+ * This class gets initialized with two type systems, and then provides
+ * resources to map type and feature codes between them.
+ *
+ * It is used by some Binary serialization/ deserialization
+ * code to allow non-exact matched type systems to send and
+ * receive CASes in a binary-like format.
+ *
+ * Use cases:
+ *
+ * Serializing: Source ts -> generate serialized form in Target ts
+ * Deserializing: Target ts -> generate deserialized form in Source ts
+ * - either from remote or
+ * - from disk-stored-form
+ *
+ * LifeCycle:
+ * Instance of this are created for a CAS when needed, and then
+ * kept in the (source) TypeSystemImpl, in a map indexed by
+ * the target type system (identity map)
+ */
+
+public class CasTypeSystemMapper {
+ private final static int[] INT0 = new int[0];
+
+ public final TypeSystemImpl tsSrc; // source type system
+ public final TypeSystemImpl tsTgt; // target type system
+
+ /**
+ * Map from source type codes to target type codes.
+ * Source type code used as index,
+ * value is target type code
+ */
+ final private int[] tSrc2Tgt;
+
+ /**
+ * First index is src type code, 2nd index is src feature offset, 0 is 1st feature.
+ * Value is -1 if tgt doesn't have feature, else it is the feature offset in target.
+ * Only for type codes that are not arrays.
+ */
+ final private int[][] fSrc2Tgt;
+
+ /**
+ * First index is src type code, 2nd index is tgt feature offset, 0 is 1st feature.
+ * Value is -1 if src doesn't have feature, else it is the feature offset in source.
+ * Only used for type codes that are not arrays.
+ * Use: When serializing a source type that exists in the target, have to output
+ * the slots in the target feature order
+ * Also, when comparing the slots in the target with a given source
+ */
+ final private int[][] tgtFoffsets2Src;
+
+ /**
+ * Same as tSrc2Tgt, but reversed
+ * used when deserializing a target back into a source
+ */
+ final private int[] tTgt2Src;
+
+ public CasTypeSystemMapper(TypeSystemImpl tsSrc, TypeSystemImpl tsTgt) {
+ if (!tsSrc.isCommitted() || !tsTgt.isCommitted()) {
+ throw new RuntimeException("Type Systems must be committed before calling this method");
+ }
+ this.tsSrc = tsSrc;
+ this.tsTgt = tsTgt;
+
+ this.tSrc2Tgt = addTypes(tsSrc, tsTgt);
+ this.tTgt2Src = addTypes(tsTgt, tsSrc);
+ this.fSrc2Tgt = new int[tsSrc.getTypeArraySize()] [];
+ this.tgtFoffsets2Src = new int[tsSrc.getTypeArraySize()] [];
+ addFeatures(tsSrc, tsTgt);
+// this.fTgt2Src = addFeatures(tsTgt, tsSrc);
+ }
+
+ // returns 0 if type doesn't have corresponding code in other type system
+ public int mapTypeCodeSrc2Tgt(int c) {
+ return tSrc2Tgt[c];
+ }
+
+ // returns 0 if type doesn't have corresponding code in other type system
+ public int mapTypeCodeTgt2Src(int c) {
+ return tTgt2Src[c];
+ }
+
+ public int[] getTgtFeatOffsets2Src(int tCode) {
+ return tgtFoffsets2Src[tCode];
+ }
+
+ // returns -1 if feature doesn't have corresponding code in other type system
+ public int mapFeatureOffsetSrc2Tgt(int tCode, int offset) {
+ return fSrc2Tgt[tCode][offset];
+ }
+
+ // returns 0 if feature doesn't have corresponding code in other type system
+// public int mapFeatureCodeTgt2Src(int c) {
+// return fTgt2Src[c];
+// }
+
+ private int[] addTypes(TypeSystemImpl tsSrc, TypeSystemImpl tsTgt) {
+ Map<TypeImpl, TypeImpl> mSrc2Tgt = new LinkedHashMap<TypeImpl, TypeImpl>();
+ for (Iterator<Type> it = tsSrc.getTypeIterator(); it.hasNext();) {
+ TypeImpl tSrc = (TypeImpl) it.next();
+ TypeImpl tTgt = (TypeImpl) tsTgt.getType(tSrc.getName());
+ if (tTgt != null) {
+ mSrc2Tgt.put(tSrc, tTgt);
+ }
+ }
+ int[] r = new int[tsSrc.getNumberOfTypes() + 1]; // type codes are numbered starting with 1
+ for (Entry<TypeImpl, TypeImpl> e : mSrc2Tgt.entrySet()) {
+ r[e.getKey().getCode()] = e.getValue().getCode();
+ }
+ return r;
+ }
+
+ private void addFeatures(TypeSystemImpl tsSrc, TypeSystemImpl tsTgt) {
+ for (int tCodeSrc = 0; tCodeSrc < tsSrc.getTypeArraySize(); tCodeSrc++) {
+ final int tCodeTgt = mapTypeCodeSrc2Tgt(tCodeSrc);
+ if (tCodeTgt == 0) { // this type not in target
+ fSrc2Tgt[tCodeSrc] = INT0;
+ tgtFoffsets2Src[tCodeSrc] = null; // should never be referenced
+ continue;
+ }
+
+ // type is part of target ts
+ final int[] fcSrc = tsSrc.ll_getAppropriateFeatures(tCodeSrc);
+ final int[] fcTgt = tsTgt.ll_getAppropriateFeatures(tCodeTgt);
+
+ if (fcSrc.length == 0) {
+ // source has no features
+ fSrc2Tgt[tCodeSrc] = INT0;
+ tgtFoffsets2Src[tCodeSrc] = new int[fcTgt.length];
+ Arrays.fill(tgtFoffsets2Src[tCodeSrc], -1);
+ continue; // source type has no features
+ }
+
+ final int[] src2tgtOffsets = new int[fcSrc.length];
+ fSrc2Tgt[tCodeSrc] = src2tgtOffsets;
+
+ if (fcTgt.length == 0) {
+ Arrays.fill(src2tgtOffsets, -1);
+ tgtFoffsets2Src[tCodeSrc] = INT0;
+ continue; // target type has no features
+ }
+ final int[] tgt2srcOffsets = new int[fcTgt.length];
+ tgtFoffsets2Src[tCodeSrc] = tgt2srcOffsets;
+
+// // debug
+// if (tCodeTgt == 228) {
+// String ss[] = new String[fcTgt.length];
+// for (int i = 0; i < fcTgt.length; i++) {
+// ss[i] = tsTgt.ll_getFeatureForCode(fcTgt[i]).getName();
+// }
+// System.out.print("");
+// }
+// // debug - verify features are in alpha order
+// String ss[] = new String[fcSrc.length];
+// String prev = " ";
+// boolean fault = false;
+// for (int i = 0; i < fcSrc.length; i++) {
+// String s = tsSrc.ll_getFeatureForCode(fcSrc[i]).getName();
+// ss[i] = s;
+// if (prev.compareTo(s) >= 0) {
+// fault = true;
+// System.out.format("Source feature names not sorted, prev = %s, this = %s%n", prev, s);
+// }
+// prev = s;
+// }
+// if (fault) {
+// System.out.print("");
+// }
+// prev = " ";
+// if (tCodeTgt == 228) {
+//
+// for (int i = 0; i < fcTgt.length; i++) {
+// String s = tsTgt.ll_getFeatureForCode(fcTgt[i]).getName();
+// if (prev.compareTo(s) >= 0) {
+// fault = true;
+// System.out.format("Target feature names not sorted, prev = %s, this = %s%n", prev, s);
+// }
+// prev = s;
+// }
+// }
+
+ // get List of names of appropriate features in the target for this type
+ List<String> namesTgt = new ArrayList<String>(fcTgt.length);
+ for (int i = 0; i < fcTgt.length; i++) {
+ namesTgt.add(tsTgt.ll_getFeatureForCode(fcTgt[i]).getName());
+ }
+
+ // get List of names of appropriate features in the source for this type
+ List<String> namesSrc = new ArrayList<String>(fcSrc.length);
+ for (int i = 0; i < fcSrc.length; i++) {
+ namesSrc.add(tsSrc.ll_getFeatureForCode(fcSrc[i]).getName());
+ }
+
+
+ // for each feature in the source, find the corresponding target feature by name match (if any)
+ for (int fciSrc = 0; fciSrc < fcSrc.length; fciSrc++) {
+ final String nameSrc = namesSrc.get(fciSrc);
+ // feature names are semi sorted, not completely sorted due to inheritence
+ final int iTgt = namesTgt.indexOf(nameSrc);
+ src2tgtOffsets[fciSrc] = iTgt; // -1 if not there
+ } // end of for loop over all source features of a type code
+
+ // for each feature in the target, find the corresponding source feature by name match (if any)
+ for (int fciTgt = 0; fciTgt < fcTgt.length; fciTgt++) {
+ final String nameTgt = namesTgt.get(fciTgt);
+ // feature names are semi sorted, not completely sorted due to inheritence
+ final int iSrc = namesSrc.indexOf(nameTgt);
+ tgt2srcOffsets[fciTgt] = iSrc; // -1 if not there
+ } // end of for loop over all target features of a type code
+
+
+ } // end of for loop over all typecodes
+ }
+
+
+}
Propchange: uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CasTypeSystemMapper.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java?rev=1437217&r1=1437216&r2=1437217&view=diff
==============================================================================
--- uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java (original)
+++ uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/TypeSystemImpl.java Tue Jan 22 22:19:34 2013
@@ -1558,6 +1558,9 @@ public class TypeSystemImpl implements T
Map<TypeSystemImpl, CasTypeSystemMapper> typeSystemMappers = new HashMap<TypeSystemImpl, CasTypeSystemMapper>();
synchronized CasTypeSystemMapper getTypeSystemMapper(TypeSystemImpl tgtTs) {
+ if ((null == tgtTs) || (this == tgtTs)) {
+ return null; // conventions for no type mapping
+ }
CasTypeSystemMapper m = typeSystemMappers.get(tgtTs);
if (null == m) {
m = new CasTypeSystemMapper(this, tgtTs);