You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2007/05/29 23:05:07 UTC
svn commit: r542651 - in
/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/jcas/impl:
JCasHashMap.java JCasImpl.java
Author: schor
Date: Tue May 29 14:05:06 2007
New Revision: 542651
URL: http://svn.apache.org/viewvc?view=rev&rev=542651
Log:
[UIMA-419] reduce space used for casAddr -> JCas objects, as follows: Use a hash table
with no keys (keys derived from value), with no "boxing" of int key values, and using
the identity-hash style of no hashmap.entry objects. This is a specialized map, not
implementing the full map interface, just for this purpose.
Added:
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/jcas/impl/JCasHashMap.java
Modified:
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/jcas/impl/JCasImpl.java
Added: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/jcas/impl/JCasHashMap.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/jcas/impl/JCasHashMap.java?view=auto&rev=542651
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/jcas/impl/JCasHashMap.java (added)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/jcas/impl/JCasHashMap.java Tue May 29 14:05:06 2007
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.jcas.impl;
+
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.uima.cas.impl.FeatureStructureImpl;
+
+/**
+ * Special space-saving table that maps between CAS addr and
+ * JCas cover objects.
+ *
+ * Assumptions: Each addr has a corresponding JCas; it is not
+ * permitted to "update" an addr with a different JCas
+ * cover class (unless the table is cleared first).
+ *
+ * Table always a power of 2 in size - permits faster hashing
+ *
+ */
+public class JCasHashMap {
+
+ // set to true to collect statistics for tuning
+ // you have to also put a call to jcas.showJfsFromCaddrHistogram() at the end of the run
+ private static final boolean TUNE = false;
+
+ // This inner class is only here to get access to the "next(bits)" method of Random
+ private static class MyRandom extends Random {
+ protected int next(int bits) {
+ return super.next(bits);
+ }
+ private static final long serialVersionUID = 1L;
+ }
+
+ //These are for tuning
+ private int histogram [];
+ private int nbrProbes;
+ private int maxProbe = 0;
+
+ private int sizeWhichTriggersExpansion;
+
+ private float loadFactor = (float)0.50;
+
+ private int size; // number of elements in the table
+
+ private FeatureStructureImpl [] table;
+
+ private MyRandom random = new MyRandom();
+
+ // These are for hashing the CAS address
+ private int bits; // number of random bits to generate for a probe
+ private int bitsMask; // 1's to "and" with result to keep in range
+ private int casAddr;
+
+ JCasHashMap(int initialSize) {
+ // round initialSize to a power of 2
+ int n = initialSize;
+ int i = 0;
+ while (n != 0) {
+ i++;
+ n = n >> 1;
+ }
+ // n = 1, i = 1
+ // n = 2,3 i = 2
+ // n = 4,5,6,7 i = 3
+
+ if (1<< (i - 1) == initialSize) {
+ i = i - 1; // if initial size was a power of 2, correct for that
+ }
+ // n = 2 i = 1
+ // n = 3,4 i = 2
+ // n = 5,6,7,8 i = 3
+ bits = i;
+ bitsMask = (1<<i) - 1;
+ initialSize = 1<<i;
+ table = new FeatureStructureImpl[initialSize];
+ sizeWhichTriggersExpansion = (int)(initialSize * loadFactor);
+ size = 0;
+ if (TUNE) {
+ histogram = new int[30];
+ Arrays.fill(histogram, 0);
+ }
+ }
+
+ public void clear() {
+ Arrays.fill(table, null);
+ size = 0;
+ }
+
+ public FeatureStructureImpl get(int key) {
+ FeatureStructureImpl maybe = table[probe(key)];
+ while ((null != maybe) && (maybe.getAddress() != key)) {
+ maybe = table[nextProbe()];
+ }
+ if (TUNE) {
+ histogram[Math.min(histogram.length - 1, nbrProbes - 1)]++;
+ maxProbe = Math.max(maxProbe, nbrProbes);
+ }
+ return maybe;
+ }
+
+ public void put(FeatureStructureImpl value) {
+ final int key = value.getAddress();
+ int probeAddr = probe(key);
+ if (TUNE) {
+ if (key < 200) {
+ System.out.println("key = " + key + ", probe= " + probeAddr);
+ }
+ }
+ while (null != table[probeAddr]) {
+ probeAddr = nextProbe();
+ }
+ if (TUNE) {
+ histogram[Math.min(histogram.length - 1, nbrProbes - 1)]++;
+ maxProbe = Math.max(maxProbe, nbrProbes);
+ }
+ table[probeAddr] = value;
+ size++;
+ if (size >= sizeWhichTriggersExpansion) {
+ increaseSize();
+ }
+ }
+
+ public int size() {
+ return size;
+ }
+
+ private int probe(int addr) {
+ casAddr = addr;
+ random.setSeed(addr);
+ random.next(1); // once to randomize all bits
+ if (TUNE)
+ nbrProbes = 0;
+ return nextProbe();
+ }
+
+ private int nextProbe() {
+ if (TUNE) {
+ nbrProbes++;
+ }
+ // adding the casAddr insures the probe sequence is
+ // different for different addrs, even if by chance the
+ // random bit sequence is the same for two casAddrs.
+ return (random.next(bits) + casAddr) & bitsMask;
+ }
+
+ private void increaseSize() {
+ final FeatureStructureImpl [] oldTable = table;
+ final int oldCapacity = oldTable.length;
+
+ int newCapacity = 2 * oldCapacity;
+ bits += 1;
+ bitsMask = (1<<bits) - 1;
+
+ sizeWhichTriggersExpansion = (int)(newCapacity * loadFactor);
+ if (TUNE)
+ System.out.println("Size increasing from " + oldCapacity + " to " + newCapacity);
+ table = new FeatureStructureImpl [newCapacity];
+ size = 0;
+ for (int i = 0; i < oldCapacity; i++) {
+ if (oldTable[i] != null) {
+ put(oldTable[i]);
+ }
+ }
+ }
+
+ public void showHistogram() {
+
+ System.out.println("Histogram of number of probes, factor = " + loadFactor + ", max = " + maxProbe);
+ System.out.println("bytes / entry = " + (float)(table.length) * 4/size);
+ for (int i = 0; i < histogram.length; i++) {
+ System.out.println (i + ": " + histogram[i]);
+ }
+
+ }
+}
Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/jcas/impl/JCasImpl.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/jcas/impl/JCasImpl.java?view=diff&rev=542651&r1=542650&r2=542651
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/jcas/impl/JCasImpl.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/jcas/impl/JCasImpl.java Tue May 29 14:05:06 2007
@@ -54,6 +54,7 @@
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.impl.CASImpl;
+import org.apache.uima.cas.impl.FeatureStructureImpl;
import org.apache.uima.cas.impl.LowLevelCAS;
import org.apache.uima.cas.impl.LowLevelException;
import org.apache.uima.cas.impl.LowLevelIndexRepository;
@@ -215,7 +216,7 @@
// *************************************************
// * Static Data shared with all instances of JCas *
// *************************************************
- private final static int INITIAL_HASHMAP_SIZE = 200;
+ private final static int INITIAL_HASHMAP_SIZE = 256;
// keys = class loader,
// values = Maps: keys = string = fully qualified java type names of _Type classes
@@ -258,7 +259,7 @@
*/
private int prevCaddr2JfsSize = INITIAL_HASHMAP_SIZE;
- private Map cAddr2Jfs = new HashMap(INITIAL_HASHMAP_SIZE);
+ private JCasHashMap cAddr2Jfs = new JCasHashMap(INITIAL_HASHMAP_SIZE);
/* convenience holders of CAS constants that may be useful */
/* initialization done lazily - on first call to getter */
@@ -832,7 +833,7 @@
* @see org.apache.uima.jcas.JCas#putJfsFromCaddr(int, org.apache.uima.cas.FeatureStructure)
*/
public void putJfsFromCaddr(int casAddr, FeatureStructure fs) {
- sharedView.cAddr2Jfs.put(new Integer(casAddr), fs);
+ sharedView.cAddr2Jfs.put((FeatureStructureImpl)fs);
}
/*
@@ -841,8 +842,12 @@
* @see org.apache.uima.jcas.JCas#getJfsFromCaddr(int)
*/
public TOP getJfsFromCaddr(int casAddr) {
- return (TOP) sharedView.cAddr2Jfs.get(new Integer(casAddr));
+ return (TOP) sharedView.cAddr2Jfs.get(casAddr);
}
+
+ public void showJfsFromCaddrHistogram() {
+ sharedView.cAddr2Jfs.showHistogram();
+ }
// * Implementation of part of the Cas interface as part of JCas*
@@ -862,7 +867,7 @@
// System.out.println("JCas Shrinking Hashtable from " +
// jcas.prevCaddr2JfsSize);
sv.prevCaddr2JfsSize = hashSize;
- sv.cAddr2Jfs = new HashMap(hashSize);
+ sv.cAddr2Jfs = new JCasHashMap(hashSize);
} else {
sv.prevCaddr2JfsSize = Math.max(hashSize, sv.prevCaddr2JfsSize);
// System.out.println("JCas clearing - keeping same size, new max prev