You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by tw...@apache.org on 2008/06/06 16:17:36 UTC
svn commit: r663949 -
/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/StringHeap.java
Author: twgoetz
Date: Fri Jun 6 07:17:36 2008
New Revision: 663949
URL: http://svn.apache.org/viewvc?rev=663949&view=rev
Log:
Jira UIMA-1067: remove legacy char heap/ref heap from StringHeap code.
https://issues.apache.org/jira/browse/UIMA-1067
Modified:
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/StringHeap.java
Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/StringHeap.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/StringHeap.java?rev=663949&r1=663948&r2=663949&view=diff
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/StringHeap.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/StringHeap.java Fri Jun 6 07:17:36 2008
@@ -20,272 +20,146 @@
package org.apache.uima.cas.impl;
import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.uima.internal.util.IntArrayUtils;
+import java.util.List;
/**
* Encapsulate string storage for the CAS.
*
*/
final class StringHeap {
- private static final int DEFAULT_REF_HEAP_BASE_SIZE = 5000;
-
- private static final int DEFAULT_REF_HEAP_MULT_LIMIT = DEFAULT_REF_HEAP_BASE_SIZE * 1024;
-
- private static final int DEFAULT_STRING_HEAP_BASE_SIZE = 20000;
-
- private static final int DEFAULT_STRING_HEAP_MULT_LIMIT = DEFAULT_STRING_HEAP_BASE_SIZE * 1024;
-
- private static final int MIN_REF_HEAP_BASE_SIZE = 1024;
-
- private static final int MIN_STR_HEAP_BASE_SIZE = 1024 * 32;
-
- protected static final int REF_HEAP_CELL_SIZE = 3;
-
- protected static final int CHAR_HEAP_POINTER_OFFSET = 0;
-
- protected static final int CHAR_HEAP_STRLEN_OFFSET = 1;
-
- protected static final int STRING_LIST_ADDR_OFFSET = 2;
-
- private static final int GROWTH_FACTOR = 4;
-
- private static final int NULL = 0;
-
- // Start pos so that first returned string code is 1.
- protected static final int FIRST_CELL_REF = 3;
-
- private final int refHeapBaseSize;
+
+ private static final int leastStringCode = 1;
- private final int refHeapMultLimit;
-
- private final int strHeapBaseSize;
-
- private final int strHeapMultLimit;
-
- protected int refHeapPos = FIRST_CELL_REF;
-
- protected int[] refHeap;
-
- protected ArrayList stringList;
-
- protected int charHeapPos = 0;
-
- protected char[] stringHeap;
+ private List<String> stringList;
StringHeap() {
- this(DEFAULT_REF_HEAP_BASE_SIZE, DEFAULT_REF_HEAP_MULT_LIMIT, DEFAULT_STRING_HEAP_BASE_SIZE,
- DEFAULT_STRING_HEAP_MULT_LIMIT);
- }
-
- StringHeap(int refHeapBaseSize, int refHeapMultLimit, int strHeapBaseSize, int strHeapMultLimit) {
super();
- if (refHeapBaseSize < MIN_REF_HEAP_BASE_SIZE) {
- this.refHeapBaseSize = MIN_REF_HEAP_BASE_SIZE;
- } else {
- this.refHeapBaseSize = refHeapBaseSize;
- }
- if (refHeapMultLimit < DEFAULT_REF_HEAP_MULT_LIMIT) {
- this.refHeapMultLimit = DEFAULT_REF_HEAP_MULT_LIMIT;
- } else {
- this.refHeapMultLimit = refHeapMultLimit;
- }
- if (strHeapBaseSize < MIN_STR_HEAP_BASE_SIZE) {
- this.strHeapBaseSize = MIN_STR_HEAP_BASE_SIZE;
- } else {
- this.strHeapBaseSize = strHeapBaseSize;
- }
- if (strHeapMultLimit < DEFAULT_STRING_HEAP_MULT_LIMIT) {
- this.strHeapMultLimit = DEFAULT_STRING_HEAP_MULT_LIMIT;
- } else {
- this.strHeapMultLimit = strHeapMultLimit;
- }
initMemory();
}
- private static final int cas2refHeapPointer(int i) {
- return i * REF_HEAP_CELL_SIZE;
- }
-
- private static final int refHeap2casPointer(int i) {
- return i / REF_HEAP_CELL_SIZE;
- }
-
+ // Initialize internal datastructures. This used to be a lot more complicated when we had the
+ // character heap option.
private final void initMemory() {
- this.refHeap = new int[this.refHeapBaseSize];
- this.stringHeap = new char[this.strHeapBaseSize];
- this.stringList = new ArrayList();
+ this.stringList = new ArrayList<String>();
this.stringList.add(null);
}
- final void reset() {
- this.reset(false);
+ /** Deserialize from a binary serialized CAS
+ *
+ * @param shdh Serialization helper datastructure.
+ */
+ final void reinit(StringHeapDeserializationHelper shdh) {
+ initMemory();
+ int stringOffset;
+ int stringLength;
+ // Simply iterate over the ref heap and add one string after another. The references come out
+ // right because they are defined by the positions on the ref heap.
+ for (int i = StringHeapDeserializationHelper.FIRST_CELL_REF; i < shdh.refHeap.length; i += StringHeapDeserializationHelper.REF_HEAP_CELL_SIZE) {
+ stringOffset = shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_POINTER_OFFSET];
+ stringLength = shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_STRLEN_OFFSET];
+ this.stringList.add(new String(shdh.charHeap, stringOffset, stringLength));
+ }
}
- final void reset(boolean doFullReset) {
- if (doFullReset) {
- this.initMemory();
- } else {
- Arrays.fill(this.refHeap, 0, this.refHeapPos, 0);
- this.stringList = new ArrayList();
- this.stringList.add(null);
+ /**
+ * Create serialization helper datastructure.
+ * @return Serialization helper that can be interpreted easier by serialization code.
+ */
+ StringHeapDeserializationHelper serialize() {
+ StringHeapDeserializationHelper shdh = new StringHeapDeserializationHelper();
+ // Ref heap is 3 times the size of the string list.
+ shdh.refHeap = new int[this.stringList.size()
+ * StringHeapDeserializationHelper.REF_HEAP_CELL_SIZE];
+ shdh.refHeapPos = shdh.refHeap.length;
+ // Compute required size of character heap.
+ int charHeapSize = 0;
+ for (int i = 0; i < this.stringList.size(); i++) {
+ String s = this.stringList.get(i);
+ if (s != null) {
+ charHeapSize += s.length();
+ }
+ }
+ shdh.charHeap = new char[charHeapSize];
+ shdh.charHeapPos = shdh.charHeap.length;
+
+ int charCount = 0;
+ // Now write out the actual data
+ for (int i = 1; i < this.stringList.size(); i++) {
+ String s = this.stringList.get(i);
+ int refHeapOffset = i * StringHeapDeserializationHelper.REF_HEAP_CELL_SIZE;
+ shdh.refHeap[refHeapOffset + StringHeapDeserializationHelper.CHAR_HEAP_POINTER_OFFSET] = charCount;
+ shdh.refHeap[refHeapOffset + StringHeapDeserializationHelper.CHAR_HEAP_STRLEN_OFFSET] = s
+ .length();
+ System.arraycopy(s.toCharArray(), 0, shdh.charHeap, charCount, s.length());
+ charCount += s.length();
}
- this.refHeapPos = FIRST_CELL_REF;
- this.charHeapPos = 0;
+ assert (charCount == shdh.charHeap.length);
+ return shdh;
}
- // Getters
+ // Reset the string heap (called on CAS reset).
+ final void reset() {
+ initMemory();
+ }
+ // Get a string value
String getStringForCode(int stringCode) {
- if (stringCode == NULL) {
+ if (stringCode == LowLevelCAS.NULL_FS_REF) {
return null;
}
- final int strInfoRef = cas2refHeapPointer(stringCode);
- final int internalStringCode = this.refHeap[strInfoRef + STRING_LIST_ADDR_OFFSET];
- if (internalStringCode != NULL) {
- return (String) this.stringList.get(internalStringCode);
- }
- final int strOffset = this.refHeap[strInfoRef + CHAR_HEAP_POINTER_OFFSET];
- final int strLen = this.refHeap[strInfoRef + CHAR_HEAP_STRLEN_OFFSET];
- return new String(this.stringHeap, strOffset, strLen);
+ return this.stringList.get(stringCode);
}
+ // Who uses this?
int copyCharsToBuffer(int stringCode, char[] buffer, int start) {
- final int strInfoRef = cas2refHeapPointer(stringCode);
- final int strCode = this.refHeap[strInfoRef + STRING_LIST_ADDR_OFFSET];
- int strOffset = this.refHeap[strInfoRef + CHAR_HEAP_POINTER_OFFSET];
- final int bufMax = buffer.length - start;
- if (strCode == 0) {
- final int strLen = this.refHeap[strInfoRef + CHAR_HEAP_STRLEN_OFFSET];
- final int max = (strLen < bufMax) ? strLen : bufMax;
- // Warning: using start and strOffset as counters (premature
- // optimization ;)
- for (int i = 0; i < max; i++) {
- buffer[start] = this.stringHeap[strOffset];
- ++start;
- ++strOffset;
- }
- return strLen;
- }
- final int internalStringCode = this.refHeap[strInfoRef + STRING_LIST_ADDR_OFFSET];
- final String str = (String) this.stringList.get(internalStringCode);
+ final String str = this.stringList.get(stringCode);
final int len = str.length();
- final int max = (len < bufMax) ? len : bufMax;
+ final int requestedMax = start + len;
+ // Check that the buffer is long enough to copy the whole string. If it isn't long enough, we
+ // copy up to buffer.length - start characters.
+ final int max = (buffer.length < requestedMax) ? (buffer.length - start) : len;
for (int i = 0; i < max; i++) {
buffer[start + i] = str.charAt(i);
}
return len;
}
- // Setters
-
+ /**
+ * Add a string.
+ * @param s The string.
+ * @return The positional code of the added string.
+ */
int addString(String s) {
if (s == null) {
return LowLevelCAS.NULL_FS_REF;
}
- // Get and remember pointer at next free refHeap cell.
- final int cellRef = this.refHeapPos;
- // Increment pos and ensure sufficient space.
- this.refHeapPos += REF_HEAP_CELL_SIZE;
- ensureRefHeapSize();
- // Get a new string ref.
- final int stringRef = this.stringList.size();
- // Set the string list reference to the code point of the string being
- // added.
- this.refHeap[cellRef + STRING_LIST_ADDR_OFFSET] = stringRef;
- this.refHeap[cellRef + CHAR_HEAP_STRLEN_OFFSET] = s.length();
- // Add the string to the list, at the position corresponding to
- // stringRef.
+ final int addr = this.stringList.size();
this.stringList.add(s);
- // Return cas version of refHeap ref.
- return refHeap2casPointer(cellRef);
+ return addr;
}
+ // Not sure what this is supposed to do. Passes unit tests like this.
int cloneStringReference(int stringCode) {
- // Set the string list reference to the code point of the string being
- // added.
- final int strInfoRef = cas2refHeapPointer(stringCode);
- int strRef = this.refHeap[strInfoRef + STRING_LIST_ADDR_OFFSET];
- // get the length of the reference string
- int strLen = this.refHeap[strInfoRef + CHAR_HEAP_STRLEN_OFFSET];
- // Get and remember pointer at next free refHeap cell.
- final int cellRef = this.refHeapPos;
- // Increment pos and ensure sufficient space.
- this.refHeapPos += REF_HEAP_CELL_SIZE;
- ensureRefHeapSize();
- // Set the string list reference to the code point of the string being
- // reference
- this.refHeap[cellRef + STRING_LIST_ADDR_OFFSET] = strRef;
- this.refHeap[cellRef + CHAR_HEAP_STRLEN_OFFSET] = strLen;
- // Return cas version of refHeap ref.
- return refHeap2casPointer(cellRef);
- }
-
- private final void ensureRefHeapSize() {
- this.refHeap = IntArrayUtils.ensure_size(this.refHeap, this.refHeapPos, GROWTH_FACTOR,
- this.refHeapMultLimit);
+ return stringCode;
}
+ // Who uses this?
int addCharBuffer(char[] buffer, int start, int length) {
- // Get and remember pointer at next free refHeap cell.
- final int cellRef = this.refHeapPos;
- // Increment pos and ensure sufficient space.
- this.refHeapPos += REF_HEAP_CELL_SIZE;
- ensureRefHeapSize();
- final int charBufStart = this.charHeapPos;
- // Create the cell.
- this.refHeap[cellRef + CHAR_HEAP_POINTER_OFFSET] = charBufStart;
- this.refHeap[cellRef + CHAR_HEAP_STRLEN_OFFSET] = length;
- this.charHeapPos += length + 1;
- ensureCharHeapSize();
- System.arraycopy(buffer, start, this.stringHeap, charBufStart, length);
- return refHeap2casPointer(cellRef);
- }
-
- private void ensureCharHeapSize() {
- this.stringHeap = IntArrayUtils.ensure_size(this.stringHeap, this.charHeapPos, GROWTH_FACTOR,
- this.strHeapMultLimit);
- }
-
- // Informational
-
- final boolean isStringCode(int stringCode) {
- final int cellRef = cas2refHeapPointer(stringCode);
- return isValidRef(cellRef);
+ String s = new String(buffer, start, length);
+ return this.addString(s);
}
- private final boolean isValidRef(final int ref) {
- return (ref >= FIRST_CELL_REF && ref < this.refHeapPos);
- }
-
- final boolean isJavaString(int stringCode) {
- final int cellRef = cas2refHeapPointer(stringCode);
- if (!isValidRef(cellRef)) {
- return false;
- }
- return (this.refHeap[cellRef + STRING_LIST_ADDR_OFFSET] != NULL);
- }
-
- final boolean isCharArray(int stringCode) {
- final int cellRef = cas2refHeapPointer(stringCode);
- if (!isValidRef(cellRef)) {
- return false;
- }
- return (this.refHeap[cellRef + CHAR_HEAP_POINTER_OFFSET] != NULL);
- }
final int getCharArrayLength(int stringCode) {
- final int cellRef = cas2refHeapPointer(stringCode);
- return this.refHeap[cellRef + CHAR_HEAP_STRLEN_OFFSET];
+ return this.stringList.get(stringCode).length();
}
final int getLeastStringCode() {
- return refHeap2casPointer(FIRST_CELL_REF);
+ return leastStringCode;
}
final int getLargestStringCode() {
- return refHeap2casPointer(this.refHeapPos) - 1;
+ return this.stringList.size() - 1;
}
}