You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by tw...@apache.org on 2008/06/06 16:17:36 UTC

svn commit: r663949 - /incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/StringHeap.java

Author: twgoetz
Date: Fri Jun  6 07:17:36 2008
New Revision: 663949

URL: http://svn.apache.org/viewvc?rev=663949&view=rev
Log:
Jira UIMA-1067: remove legacy char heap/ref heap from StringHeap code.

https://issues.apache.org/jira/browse/UIMA-1067

Modified:
    incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/StringHeap.java

Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/StringHeap.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/StringHeap.java?rev=663949&r1=663948&r2=663949&view=diff
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/StringHeap.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/StringHeap.java Fri Jun  6 07:17:36 2008
@@ -20,272 +20,146 @@
 package org.apache.uima.cas.impl;
 
 import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.uima.internal.util.IntArrayUtils;
+import java.util.List;
 
 /**
  * Encapsulate string storage for the CAS.
  * 
  */
 final class StringHeap {
-  private static final int DEFAULT_REF_HEAP_BASE_SIZE = 5000;
-
-  private static final int DEFAULT_REF_HEAP_MULT_LIMIT = DEFAULT_REF_HEAP_BASE_SIZE * 1024;
-
-  private static final int DEFAULT_STRING_HEAP_BASE_SIZE = 20000;
-
-  private static final int DEFAULT_STRING_HEAP_MULT_LIMIT = DEFAULT_STRING_HEAP_BASE_SIZE * 1024;
-
-  private static final int MIN_REF_HEAP_BASE_SIZE = 1024;
-
-  private static final int MIN_STR_HEAP_BASE_SIZE = 1024 * 32;
-
-  protected static final int REF_HEAP_CELL_SIZE = 3;
-
-  protected static final int CHAR_HEAP_POINTER_OFFSET = 0;
-
-  protected static final int CHAR_HEAP_STRLEN_OFFSET = 1;
-
-  protected static final int STRING_LIST_ADDR_OFFSET = 2;
-
-  private static final int GROWTH_FACTOR = 4;
-
-  private static final int NULL = 0;
-
-  // Start pos so that first returned string code is 1.
-  protected static final int FIRST_CELL_REF = 3;
-
-  private final int refHeapBaseSize;
+  
+  private static final int leastStringCode = 1;
 
-  private final int refHeapMultLimit;
-
-  private final int strHeapBaseSize;
-
-  private final int strHeapMultLimit;
-
-  protected int refHeapPos = FIRST_CELL_REF;
-
-  protected int[] refHeap;
-
-  protected ArrayList stringList;
-
-  protected int charHeapPos = 0;
-
-  protected char[] stringHeap;
+  private List<String> stringList;
 
   StringHeap() {
-    this(DEFAULT_REF_HEAP_BASE_SIZE, DEFAULT_REF_HEAP_MULT_LIMIT, DEFAULT_STRING_HEAP_BASE_SIZE,
-            DEFAULT_STRING_HEAP_MULT_LIMIT);
-  }
-
-  StringHeap(int refHeapBaseSize, int refHeapMultLimit, int strHeapBaseSize, int strHeapMultLimit) {
     super();
-    if (refHeapBaseSize < MIN_REF_HEAP_BASE_SIZE) {
-      this.refHeapBaseSize = MIN_REF_HEAP_BASE_SIZE;
-    } else {
-      this.refHeapBaseSize = refHeapBaseSize;
-    }
-    if (refHeapMultLimit < DEFAULT_REF_HEAP_MULT_LIMIT) {
-      this.refHeapMultLimit = DEFAULT_REF_HEAP_MULT_LIMIT;
-    } else {
-      this.refHeapMultLimit = refHeapMultLimit;
-    }
-    if (strHeapBaseSize < MIN_STR_HEAP_BASE_SIZE) {
-      this.strHeapBaseSize = MIN_STR_HEAP_BASE_SIZE;
-    } else {
-      this.strHeapBaseSize = strHeapBaseSize;
-    }
-    if (strHeapMultLimit < DEFAULT_STRING_HEAP_MULT_LIMIT) {
-      this.strHeapMultLimit = DEFAULT_STRING_HEAP_MULT_LIMIT;
-    } else {
-      this.strHeapMultLimit = strHeapMultLimit;
-    }
     initMemory();
   }
 
-  private static final int cas2refHeapPointer(int i) {
-    return i * REF_HEAP_CELL_SIZE;
-  }
-
-  private static final int refHeap2casPointer(int i) {
-    return i / REF_HEAP_CELL_SIZE;
-  }
-
+  // Initialize internal datastructures.  This used to be a lot more complicated when we had the
+  // character heap option.  
   private final void initMemory() {
-    this.refHeap = new int[this.refHeapBaseSize];
-    this.stringHeap = new char[this.strHeapBaseSize];
-    this.stringList = new ArrayList();
+    this.stringList = new ArrayList<String>();
     this.stringList.add(null);
   }
 
-  final void reset() {
-    this.reset(false);
+  /** Deserialize from a binary serialized CAS
+   * 
+   * @param shdh Serialization helper datastructure.
+   */
+  final void reinit(StringHeapDeserializationHelper shdh) {
+    initMemory();
+    int stringOffset;
+    int stringLength;
+    // Simply iterate over the ref heap and add one string after another.  The references come out
+    // right because they are defined by the positions on the ref heap.
+    for (int i = StringHeapDeserializationHelper.FIRST_CELL_REF; i < shdh.refHeap.length; i += StringHeapDeserializationHelper.REF_HEAP_CELL_SIZE) {
+      stringOffset = shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_POINTER_OFFSET];
+      stringLength = shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_STRLEN_OFFSET];
+      this.stringList.add(new String(shdh.charHeap, stringOffset, stringLength));
+    }
   }
 
-  final void reset(boolean doFullReset) {
-    if (doFullReset) {
-      this.initMemory();
-    } else {
-      Arrays.fill(this.refHeap, 0, this.refHeapPos, 0);
-      this.stringList = new ArrayList();
-      this.stringList.add(null);
+  /**
+   * Create serialization helper datastructure.
+   * @return Serialization helper that can be interpreted easier by serialization code.
+   */
+  StringHeapDeserializationHelper serialize() {
+    StringHeapDeserializationHelper shdh = new StringHeapDeserializationHelper();
+    // Ref heap is 3 times the size of the string list.
+    shdh.refHeap = new int[this.stringList.size()
+        * StringHeapDeserializationHelper.REF_HEAP_CELL_SIZE];
+    shdh.refHeapPos = shdh.refHeap.length;
+    // Compute required size of character heap.
+    int charHeapSize = 0;
+    for (int i = 0; i < this.stringList.size(); i++) {
+      String s = this.stringList.get(i);
+      if (s != null) {
+        charHeapSize += s.length();
+      }
+    }
+    shdh.charHeap = new char[charHeapSize];
+    shdh.charHeapPos = shdh.charHeap.length;
+
+    int charCount = 0;
+    // Now write out the actual data
+    for (int i = 1; i < this.stringList.size(); i++) {
+      String s = this.stringList.get(i);
+      int refHeapOffset = i * StringHeapDeserializationHelper.REF_HEAP_CELL_SIZE;
+      shdh.refHeap[refHeapOffset + StringHeapDeserializationHelper.CHAR_HEAP_POINTER_OFFSET] = charCount;
+      shdh.refHeap[refHeapOffset + StringHeapDeserializationHelper.CHAR_HEAP_STRLEN_OFFSET] = s
+          .length();
+      System.arraycopy(s.toCharArray(), 0, shdh.charHeap, charCount, s.length());
+      charCount += s.length();
     }
-    this.refHeapPos = FIRST_CELL_REF;
-    this.charHeapPos = 0;
+    assert (charCount == shdh.charHeap.length);
+    return shdh;
   }
 
-  // Getters
+  // Reset the string heap (called on CAS reset).
+  final void reset() {
+    initMemory();
+  }
 
+  // Get a string value
   String getStringForCode(int stringCode) {
-    if (stringCode == NULL) {
+    if (stringCode == LowLevelCAS.NULL_FS_REF) {
       return null;
     }
-    final int strInfoRef = cas2refHeapPointer(stringCode);
-    final int internalStringCode = this.refHeap[strInfoRef + STRING_LIST_ADDR_OFFSET];
-    if (internalStringCode != NULL) {
-      return (String) this.stringList.get(internalStringCode);
-    }
-    final int strOffset = this.refHeap[strInfoRef + CHAR_HEAP_POINTER_OFFSET];
-    final int strLen = this.refHeap[strInfoRef + CHAR_HEAP_STRLEN_OFFSET];
-    return new String(this.stringHeap, strOffset, strLen);
+    return this.stringList.get(stringCode);
   }
 
+  // Who uses this?
   int copyCharsToBuffer(int stringCode, char[] buffer, int start) {
-    final int strInfoRef = cas2refHeapPointer(stringCode);
-    final int strCode = this.refHeap[strInfoRef + STRING_LIST_ADDR_OFFSET];
-    int strOffset = this.refHeap[strInfoRef + CHAR_HEAP_POINTER_OFFSET];
-    final int bufMax = buffer.length - start;
-    if (strCode == 0) {
-      final int strLen = this.refHeap[strInfoRef + CHAR_HEAP_STRLEN_OFFSET];
-      final int max = (strLen < bufMax) ? strLen : bufMax;
-      // Warning: using start and strOffset as counters (premature
-      // optimization ;)
-      for (int i = 0; i < max; i++) {
-        buffer[start] = this.stringHeap[strOffset];
-        ++start;
-        ++strOffset;
-      }
-      return strLen;
-    }
-    final int internalStringCode = this.refHeap[strInfoRef + STRING_LIST_ADDR_OFFSET];
-    final String str = (String) this.stringList.get(internalStringCode);
+    final String str = this.stringList.get(stringCode);
     final int len = str.length();
-    final int max = (len < bufMax) ? len : bufMax;
+    final int requestedMax = start + len;
+    // Check that the buffer is long enough to copy the whole string. If it isn't long enough, we
+    // copy up to buffer.length - start characters.
+    final int max = (buffer.length < requestedMax) ? (buffer.length - start) : len;
     for (int i = 0; i < max; i++) {
       buffer[start + i] = str.charAt(i);
     }
     return len;
   }
 
-  // Setters
-
+  /**
+   * Add a string.
+   * @param s The string.
+   * @return The positional code of the added string.
+   */
   int addString(String s) {
     if (s == null) {
       return LowLevelCAS.NULL_FS_REF;
     }
-    // Get and remember pointer at next free refHeap cell.
-    final int cellRef = this.refHeapPos;
-    // Increment pos and ensure sufficient space.
-    this.refHeapPos += REF_HEAP_CELL_SIZE;
-    ensureRefHeapSize();
-    // Get a new string ref.
-    final int stringRef = this.stringList.size();
-    // Set the string list reference to the code point of the string being
-    // added.
-    this.refHeap[cellRef + STRING_LIST_ADDR_OFFSET] = stringRef;
-    this.refHeap[cellRef + CHAR_HEAP_STRLEN_OFFSET] = s.length();
-    // Add the string to the list, at the position corresponding to
-    // stringRef.
+    final int addr = this.stringList.size();
     this.stringList.add(s);
-    // Return cas version of refHeap ref.
-    return refHeap2casPointer(cellRef);
+    return addr;
   }
 
+  // Not sure what this is supposed to do.  Passes unit tests like this.
   int cloneStringReference(int stringCode) {
-    // Set the string list reference to the code point of the string being
-    // added.
-    final int strInfoRef = cas2refHeapPointer(stringCode);
-    int strRef = this.refHeap[strInfoRef + STRING_LIST_ADDR_OFFSET];
-    // get the length of the reference string
-    int strLen = this.refHeap[strInfoRef + CHAR_HEAP_STRLEN_OFFSET];
-    // Get and remember pointer at next free refHeap cell.
-    final int cellRef = this.refHeapPos;
-    // Increment pos and ensure sufficient space.
-    this.refHeapPos += REF_HEAP_CELL_SIZE;
-    ensureRefHeapSize();
-    // Set the string list reference to the code point of the string being
-    // reference
-    this.refHeap[cellRef + STRING_LIST_ADDR_OFFSET] = strRef;
-    this.refHeap[cellRef + CHAR_HEAP_STRLEN_OFFSET] = strLen;
-    // Return cas version of refHeap ref.
-    return refHeap2casPointer(cellRef);
-  }
-
-  private final void ensureRefHeapSize() {
-    this.refHeap = IntArrayUtils.ensure_size(this.refHeap, this.refHeapPos, GROWTH_FACTOR,
-            this.refHeapMultLimit);
+    return stringCode;
   }
 
+  // Who uses this?
   int addCharBuffer(char[] buffer, int start, int length) {
-    // Get and remember pointer at next free refHeap cell.
-    final int cellRef = this.refHeapPos;
-    // Increment pos and ensure sufficient space.
-    this.refHeapPos += REF_HEAP_CELL_SIZE;
-    ensureRefHeapSize();
-    final int charBufStart = this.charHeapPos;
-    // Create the cell.
-    this.refHeap[cellRef + CHAR_HEAP_POINTER_OFFSET] = charBufStart;
-    this.refHeap[cellRef + CHAR_HEAP_STRLEN_OFFSET] = length;
-    this.charHeapPos += length + 1;
-    ensureCharHeapSize();
-    System.arraycopy(buffer, start, this.stringHeap, charBufStart, length);
-    return refHeap2casPointer(cellRef);
-  }
-
-  private void ensureCharHeapSize() {
-    this.stringHeap = IntArrayUtils.ensure_size(this.stringHeap, this.charHeapPos, GROWTH_FACTOR,
-            this.strHeapMultLimit);
-  }
-
-  // Informational
-
-  final boolean isStringCode(int stringCode) {
-    final int cellRef = cas2refHeapPointer(stringCode);
-    return isValidRef(cellRef);
+    String s = new String(buffer, start, length);
+    return this.addString(s);
   }
 
-  private final boolean isValidRef(final int ref) {
-    return (ref >= FIRST_CELL_REF && ref < this.refHeapPos);
-  }
-
-  final boolean isJavaString(int stringCode) {
-    final int cellRef = cas2refHeapPointer(stringCode);
-    if (!isValidRef(cellRef)) {
-      return false;
-    }
-    return (this.refHeap[cellRef + STRING_LIST_ADDR_OFFSET] != NULL);
-  }
-
-  final boolean isCharArray(int stringCode) {
-    final int cellRef = cas2refHeapPointer(stringCode);
-    if (!isValidRef(cellRef)) {
-      return false;
-    }
-    return (this.refHeap[cellRef + CHAR_HEAP_POINTER_OFFSET] != NULL);
-  }
 
   final int getCharArrayLength(int stringCode) {
-    final int cellRef = cas2refHeapPointer(stringCode);
-    return this.refHeap[cellRef + CHAR_HEAP_STRLEN_OFFSET];
+    return this.stringList.get(stringCode).length();
   }
 
   final int getLeastStringCode() {
-    return refHeap2casPointer(FIRST_CELL_REF);
+    return leastStringCode;
   }
 
   final int getLargestStringCode() {
-    return refHeap2casPointer(this.refHeapPos) - 1;
+    return this.stringList.size() - 1;
   }
 
 }