You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hivemall.apache.org by my...@apache.org on 2017/04/09 21:32:17 UTC
[05/12] incubator-hivemall git commit: Close #51: [HIVEMALL-75]
Support Sparse Vector Format as the input of RandomForest
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/Int2FloatOpenHashTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/collections/maps/Int2FloatOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Int2FloatOpenHashTable.java
new file mode 100644
index 0000000..f847b15
--- /dev/null
+++ b/core/src/main/java/hivemall/utils/collections/maps/Int2FloatOpenHashTable.java
@@ -0,0 +1,418 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.utils.collections.maps;
+
+import hivemall.utils.math.Primes;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.Arrays;
+
+/**
+ * An open-addressing hash table with double hashing
+ *
+ * @see http://en.wikipedia.org/wiki/Double_hashing
+ */
+public class Int2FloatOpenHashTable implements Externalizable {
+
+ protected static final byte FREE = 0;
+ protected static final byte FULL = 1;
+ protected static final byte REMOVED = 2;
+
+ private static final float DEFAULT_LOAD_FACTOR = 0.7f;
+ private static final float DEFAULT_GROW_FACTOR = 2.0f;
+
+ protected final transient float _loadFactor;
+ protected final transient float _growFactor;
+
+ protected int _used = 0;
+ protected int _threshold;
+ protected float defaultReturnValue = -1.f;
+
+ protected int[] _keys;
+ protected float[] _values;
+ protected byte[] _states;
+
+ protected Int2FloatOpenHashTable(int size, float loadFactor, float growFactor,
+ boolean forcePrime) {
+ if (size < 1) {
+ throw new IllegalArgumentException();
+ }
+ this._loadFactor = loadFactor;
+ this._growFactor = growFactor;
+ int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size;
+ this._keys = new int[actualSize];
+ this._values = new float[actualSize];
+ this._states = new byte[actualSize];
+ this._threshold = (int) (actualSize * _loadFactor);
+ }
+
+ public Int2FloatOpenHashTable(int size, float loadFactor, float growFactor) {
+ this(size, loadFactor, growFactor, true);
+ }
+
+ public Int2FloatOpenHashTable(int size) {
+ this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true);
+ }
+
+ /**
+ * Only for {@link Externalizable}
+ */
+ public Int2FloatOpenHashTable() {// required for serialization
+ this._loadFactor = DEFAULT_LOAD_FACTOR;
+ this._growFactor = DEFAULT_GROW_FACTOR;
+ }
+
+ public void defaultReturnValue(float v) {
+ this.defaultReturnValue = v;
+ }
+
+ public boolean containsKey(int key) {
+ return findKey(key) >= 0;
+ }
+
+ /**
+ * @return -1.f if not found
+ */
+ public float get(int key) {
+ int i = findKey(key);
+ if (i < 0) {
+ return defaultReturnValue;
+ }
+ return _values[i];
+ }
+
+ public float put(int key, float value) {
+ int hash = keyHash(key);
+ int keyLength = _keys.length;
+ int keyIdx = hash % keyLength;
+
+ boolean expanded = preAddEntry(keyIdx);
+ if (expanded) {
+ keyLength = _keys.length;
+ keyIdx = hash % keyLength;
+ }
+
+ int[] keys = _keys;
+ float[] values = _values;
+ byte[] states = _states;
+
+ if (states[keyIdx] == FULL) {// double hashing
+ if (keys[keyIdx] == key) {
+ float old = values[keyIdx];
+ values[keyIdx] = value;
+ return old;
+ }
+ // try second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ break;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ float old = values[keyIdx];
+ values[keyIdx] = value;
+ return old;
+ }
+ }
+ }
+ keys[keyIdx] = key;
+ values[keyIdx] = value;
+ states[keyIdx] = FULL;
+ ++_used;
+ return defaultReturnValue;
+ }
+
+ /** Return weather the required slot is free for new entry */
+ protected boolean isFree(int index, int key) {
+ byte stat = _states[index];
+ if (stat == FREE) {
+ return true;
+ }
+ if (stat == REMOVED && _keys[index] == key) {
+ return true;
+ }
+ return false;
+ }
+
+ /** @return expanded or not */
+ protected boolean preAddEntry(int index) {
+ if ((_used + 1) >= _threshold) {// too filled
+ int newCapacity = Math.round(_keys.length * _growFactor);
+ ensureCapacity(newCapacity);
+ return true;
+ }
+ return false;
+ }
+
+ protected int findKey(int key) {
+ int[] keys = _keys;
+ byte[] states = _states;
+ int keyLength = keys.length;
+
+ int hash = keyHash(key);
+ int keyIdx = hash % keyLength;
+ if (states[keyIdx] != FREE) {
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return keyIdx;
+ }
+ // try second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ return -1;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return keyIdx;
+ }
+ }
+ }
+ return -1;
+ }
+
+ public float remove(int key) {
+ int[] keys = _keys;
+ float[] values = _values;
+ byte[] states = _states;
+ int keyLength = keys.length;
+
+ int hash = keyHash(key);
+ int keyIdx = hash % keyLength;
+ if (states[keyIdx] != FREE) {
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ float old = values[keyIdx];
+ states[keyIdx] = REMOVED;
+ --_used;
+ return old;
+ }
+ // second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (states[keyIdx] == FREE) {
+ return defaultReturnValue;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ float old = values[keyIdx];
+ states[keyIdx] = REMOVED;
+ --_used;
+ return old;
+ }
+ }
+ }
+ return defaultReturnValue;
+ }
+
+ public int size() {
+ return _used;
+ }
+
+ public void clear() {
+ Arrays.fill(_states, FREE);
+ this._used = 0;
+ }
+
+ public IMapIterator entries() {
+ return new MapIterator();
+ }
+
+ @Override
+ public String toString() {
+ int len = size() * 10 + 2;
+ StringBuilder buf = new StringBuilder(len);
+ buf.append('{');
+ IMapIterator i = entries();
+ while (i.next() != -1) {
+ buf.append(i.getKey());
+ buf.append('=');
+ buf.append(i.getValue());
+ if (i.hasNext()) {
+ buf.append(',');
+ }
+ }
+ buf.append('}');
+ return buf.toString();
+ }
+
+ protected void ensureCapacity(int newCapacity) {
+ int prime = Primes.findLeastPrimeNumber(newCapacity);
+ rehash(prime);
+ this._threshold = Math.round(prime * _loadFactor);
+ }
+
+ private void rehash(int newCapacity) {
+ int oldCapacity = _keys.length;
+ if (newCapacity <= oldCapacity) {
+ throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity);
+ }
+ int[] newkeys = new int[newCapacity];
+ float[] newValues = new float[newCapacity];
+ byte[] newStates = new byte[newCapacity];
+ int used = 0;
+ for (int i = 0; i < oldCapacity; i++) {
+ if (_states[i] == FULL) {
+ used++;
+ int k = _keys[i];
+ float v = _values[i];
+ int hash = keyHash(k);
+ int keyIdx = hash % newCapacity;
+ if (newStates[keyIdx] == FULL) {// second hashing
+ int decr = 1 + (hash % (newCapacity - 2));
+ while (newStates[keyIdx] != FREE) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += newCapacity;
+ }
+ }
+ }
+ newkeys[keyIdx] = k;
+ newValues[keyIdx] = v;
+ newStates[keyIdx] = FULL;
+ }
+ }
+ this._keys = newkeys;
+ this._values = newValues;
+ this._states = newStates;
+ this._used = used;
+ }
+
+ private static int keyHash(int key) {
+ return key & 0x7fffffff;
+ }
+
+ public void writeExternal(ObjectOutput out) throws IOException {
+ out.writeInt(_threshold);
+ out.writeInt(_used);
+
+ out.writeInt(_keys.length);
+ IMapIterator i = entries();
+ while (i.next() != -1) {
+ out.writeInt(i.getKey());
+ out.writeFloat(i.getValue());
+ }
+ }
+
+ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+ this._threshold = in.readInt();
+ this._used = in.readInt();
+
+ int keylen = in.readInt();
+ int[] keys = new int[keylen];
+ float[] values = new float[keylen];
+ byte[] states = new byte[keylen];
+ for (int i = 0; i < _used; i++) {
+ int k = in.readInt();
+ float v = in.readFloat();
+ int hash = keyHash(k);
+ int keyIdx = hash % keylen;
+ if (states[keyIdx] != FREE) {// second hash
+ int decr = 1 + (hash % (keylen - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keylen;
+ }
+ if (states[keyIdx] == FREE) {
+ break;
+ }
+ }
+ }
+ states[keyIdx] = FULL;
+ keys[keyIdx] = k;
+ values[keyIdx] = v;
+ }
+ this._keys = keys;
+ this._values = values;
+ this._states = states;
+ }
+
+ public interface IMapIterator {
+
+ public boolean hasNext();
+
+ /**
+ * @return -1 if not found
+ */
+ public int next();
+
+ public int getKey();
+
+ public float getValue();
+
+ }
+
+ private final class MapIterator implements IMapIterator {
+
+ int nextEntry;
+ int lastEntry = -1;
+
+ MapIterator() {
+ this.nextEntry = nextEntry(0);
+ }
+
+ /** find the index of next full entry */
+ int nextEntry(int index) {
+ while (index < _keys.length && _states[index] != FULL) {
+ index++;
+ }
+ return index;
+ }
+
+ public boolean hasNext() {
+ return nextEntry < _keys.length;
+ }
+
+ public int next() {
+ if (!hasNext()) {
+ return -1;
+ }
+ int curEntry = nextEntry;
+ this.lastEntry = curEntry;
+ this.nextEntry = nextEntry(curEntry + 1);
+ return curEntry;
+ }
+
+ public int getKey() {
+ if (lastEntry == -1) {
+ throw new IllegalStateException();
+ }
+ return _keys[lastEntry];
+ }
+
+ public float getValue() {
+ if (lastEntry == -1) {
+ throw new IllegalStateException();
+ }
+ return _values[lastEntry];
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/Int2IntOpenHashTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/collections/maps/Int2IntOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Int2IntOpenHashTable.java
new file mode 100644
index 0000000..5e9e812
--- /dev/null
+++ b/core/src/main/java/hivemall/utils/collections/maps/Int2IntOpenHashTable.java
@@ -0,0 +1,414 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.utils.collections.maps;
+
+import hivemall.utils.math.Primes;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.Arrays;
+
+/**
+ * An open-addressing hash table with double hashing
+ *
+ * @see http://en.wikipedia.org/wiki/Double_hashing
+ */
+public final class Int2IntOpenHashTable implements Externalizable {
+
+ protected static final byte FREE = 0;
+ protected static final byte FULL = 1;
+ protected static final byte REMOVED = 2;
+
+ private static final float DEFAULT_LOAD_FACTOR = 0.7f;
+ private static final float DEFAULT_GROW_FACTOR = 2.0f;
+
+ protected final transient float _loadFactor;
+ protected final transient float _growFactor;
+
+ protected int _used = 0;
+ protected int _threshold;
+ protected int defaultReturnValue = -1;
+
+ protected int[] _keys;
+ protected int[] _values;
+ protected byte[] _states;
+
+ protected Int2IntOpenHashTable(int size, float loadFactor, float growFactor, boolean forcePrime) {
+ if (size < 1) {
+ throw new IllegalArgumentException();
+ }
+ this._loadFactor = loadFactor;
+ this._growFactor = growFactor;
+ int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size;
+ this._keys = new int[actualSize];
+ this._values = new int[actualSize];
+ this._states = new byte[actualSize];
+ this._threshold = (int) (actualSize * _loadFactor);
+ }
+
+ public Int2IntOpenHashTable(int size, int loadFactor, int growFactor) {
+ this(size, loadFactor, growFactor, true);
+ }
+
+ public Int2IntOpenHashTable(int size) {
+ this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true);
+ }
+
+ public Int2IntOpenHashTable() {// required for serialization
+ this._loadFactor = DEFAULT_LOAD_FACTOR;
+ this._growFactor = DEFAULT_GROW_FACTOR;
+ }
+
+ public void defaultReturnValue(int v) {
+ this.defaultReturnValue = v;
+ }
+
+ public boolean containsKey(final int key) {
+ return findKey(key) >= 0;
+ }
+
+ /**
+ * @return -1.f if not found
+ */
+ public int get(final int key) {
+ final int i = findKey(key);
+ if (i < 0) {
+ return defaultReturnValue;
+ }
+ return _values[i];
+ }
+
+ public int put(final int key, final int value) {
+ final int hash = keyHash(key);
+ int keyLength = _keys.length;
+ int keyIdx = hash % keyLength;
+
+ boolean expanded = preAddEntry(keyIdx);
+ if (expanded) {
+ keyLength = _keys.length;
+ keyIdx = hash % keyLength;
+ }
+
+ final int[] keys = _keys;
+ final int[] values = _values;
+ final byte[] states = _states;
+
+ if (states[keyIdx] == FULL) {// double hashing
+ if (keys[keyIdx] == key) {
+ int old = values[keyIdx];
+ values[keyIdx] = value;
+ return old;
+ }
+ // try second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ break;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ int old = values[keyIdx];
+ values[keyIdx] = value;
+ return old;
+ }
+ }
+ }
+ keys[keyIdx] = key;
+ values[keyIdx] = value;
+ states[keyIdx] = FULL;
+ ++_used;
+ return defaultReturnValue;
+ }
+
+ /** Return weather the required slot is free for new entry */
+ protected boolean isFree(final int index, final int key) {
+ final byte stat = _states[index];
+ if (stat == FREE) {
+ return true;
+ }
+ if (stat == REMOVED && _keys[index] == key) {
+ return true;
+ }
+ return false;
+ }
+
+ /** @return expanded or not */
+ protected boolean preAddEntry(final int index) {
+ if ((_used + 1) >= _threshold) {// too filled
+ int newCapacity = Math.round(_keys.length * _growFactor);
+ ensureCapacity(newCapacity);
+ return true;
+ }
+ return false;
+ }
+
+ protected int findKey(final int key) {
+ final int[] keys = _keys;
+ final byte[] states = _states;
+ final int keyLength = keys.length;
+
+ final int hash = keyHash(key);
+ int keyIdx = hash % keyLength;
+ if (states[keyIdx] != FREE) {
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return keyIdx;
+ }
+ // try second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ return -1;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return keyIdx;
+ }
+ }
+ }
+ return -1;
+ }
+
+ public int remove(final int key) {
+ final int[] keys = _keys;
+ final int[] values = _values;
+ final byte[] states = _states;
+ final int keyLength = keys.length;
+
+ final int hash = keyHash(key);
+ int keyIdx = hash % keyLength;
+ if (states[keyIdx] != FREE) {
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ int old = values[keyIdx];
+ states[keyIdx] = REMOVED;
+ --_used;
+ return old;
+ }
+ // second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (states[keyIdx] == FREE) {
+ return defaultReturnValue;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ int old = values[keyIdx];
+ states[keyIdx] = REMOVED;
+ --_used;
+ return old;
+ }
+ }
+ }
+ return defaultReturnValue;
+ }
+
+ public int size() {
+ return _used;
+ }
+
+ public void clear() {
+ Arrays.fill(_states, FREE);
+ this._used = 0;
+ }
+
+ public IMapIterator entries() {
+ return new MapIterator();
+ }
+
+ @Override
+ public String toString() {
+ int len = size() * 10 + 2;
+ StringBuilder buf = new StringBuilder(len);
+ buf.append('{');
+ IMapIterator i = entries();
+ while (i.next() != -1) {
+ buf.append(i.getKey());
+ buf.append('=');
+ buf.append(i.getValue());
+ if (i.hasNext()) {
+ buf.append(',');
+ }
+ }
+ buf.append('}');
+ return buf.toString();
+ }
+
+ protected void ensureCapacity(final int newCapacity) {
+ int prime = Primes.findLeastPrimeNumber(newCapacity);
+ rehash(prime);
+ this._threshold = Math.round(prime * _loadFactor);
+ }
+
+ private void rehash(final int newCapacity) {
+ int oldCapacity = _keys.length;
+ if (newCapacity <= oldCapacity) {
+ throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity);
+ }
+ final int[] newkeys = new int[newCapacity];
+ final int[] newValues = new int[newCapacity];
+ final byte[] newStates = new byte[newCapacity];
+ int used = 0;
+ for (int i = 0; i < oldCapacity; i++) {
+ if (_states[i] == FULL) {
+ used++;
+ int k = _keys[i];
+ int v = _values[i];
+ int hash = keyHash(k);
+ int keyIdx = hash % newCapacity;
+ if (newStates[keyIdx] == FULL) {// second hashing
+ int decr = 1 + (hash % (newCapacity - 2));
+ while (newStates[keyIdx] != FREE) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += newCapacity;
+ }
+ }
+ }
+ newkeys[keyIdx] = k;
+ newValues[keyIdx] = v;
+ newStates[keyIdx] = FULL;
+ }
+ }
+ this._keys = newkeys;
+ this._values = newValues;
+ this._states = newStates;
+ this._used = used;
+ }
+
+ private static int keyHash(final int key) {
+ return key & 0x7fffffff;
+ }
+
+ public void writeExternal(ObjectOutput out) throws IOException {
+ out.writeInt(_threshold);
+ out.writeInt(_used);
+
+ out.writeInt(_keys.length);
+ IMapIterator i = entries();
+ while (i.next() != -1) {
+ out.writeInt(i.getKey());
+ out.writeInt(i.getValue());
+ }
+ }
+
+ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+ this._threshold = in.readInt();
+ this._used = in.readInt();
+
+ final int keylen = in.readInt();
+ final int[] keys = new int[keylen];
+ final int[] values = new int[keylen];
+ final byte[] states = new byte[keylen];
+ for (int i = 0; i < _used; i++) {
+ int k = in.readInt();
+ int v = in.readInt();
+ int hash = keyHash(k);
+ int keyIdx = hash % keylen;
+ if (states[keyIdx] != FREE) {// second hash
+ int decr = 1 + (hash % (keylen - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keylen;
+ }
+ if (states[keyIdx] == FREE) {
+ break;
+ }
+ }
+ }
+ states[keyIdx] = FULL;
+ keys[keyIdx] = k;
+ values[keyIdx] = v;
+ }
+ this._keys = keys;
+ this._values = values;
+ this._states = states;
+ }
+
+ public interface IMapIterator {
+
+ public boolean hasNext();
+
+ /**
+ * @return -1 if not found
+ */
+ public int next();
+
+ public int getKey();
+
+ public int getValue();
+
+ }
+
+ private final class MapIterator implements IMapIterator {
+
+ int nextEntry;
+ int lastEntry = -1;
+
+ MapIterator() {
+ this.nextEntry = nextEntry(0);
+ }
+
+ /** find the index of next full entry */
+ int nextEntry(int index) {
+ while (index < _keys.length && _states[index] != FULL) {
+ index++;
+ }
+ return index;
+ }
+
+ public boolean hasNext() {
+ return nextEntry < _keys.length;
+ }
+
+ public int next() {
+ if (!hasNext()) {
+ return -1;
+ }
+ int curEntry = nextEntry;
+ this.lastEntry = curEntry;
+ this.nextEntry = nextEntry(curEntry + 1);
+ return curEntry;
+ }
+
+ public int getKey() {
+ if (lastEntry == -1) {
+ throw new IllegalStateException();
+ }
+ return _keys[lastEntry];
+ }
+
+ public int getValue() {
+ if (lastEntry == -1) {
+ throw new IllegalStateException();
+ }
+ return _values[lastEntry];
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashTable.java
new file mode 100644
index 0000000..68eb42f
--- /dev/null
+++ b/core/src/main/java/hivemall/utils/collections/maps/Int2LongOpenHashTable.java
@@ -0,0 +1,500 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.utils.collections.maps;
+
+import hivemall.utils.codec.VariableByteCodec;
+import hivemall.utils.codec.ZigZagLEB128Codec;
+import hivemall.utils.math.Primes;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.Arrays;
+
+import javax.annotation.Nonnull;
+
+/**
+ * An open-addressing hash table with double hashing
+ *
+ * @see http://en.wikipedia.org/wiki/Double_hashing
+ */
+public class Int2LongOpenHashTable implements Externalizable {
+
+ protected static final byte FREE = 0;
+ protected static final byte FULL = 1;
+ protected static final byte REMOVED = 2;
+
+ public static final int DEFAULT_SIZE = 65536;
+ public static final float DEFAULT_LOAD_FACTOR = 0.7f;
+ public static final float DEFAULT_GROW_FACTOR = 2.0f;
+
+ protected final transient float _loadFactor;
+ protected final transient float _growFactor;
+
+ protected int[] _keys;
+ protected long[] _values;
+ protected byte[] _states;
+
+ protected int _used;
+ protected int _threshold;
+ protected long defaultReturnValue = -1L;
+
+ /**
+ * Constructor for Externalizable. Should not be called otherwise.
+ */
+ public Int2LongOpenHashTable() {// for Externalizable
+ this._loadFactor = DEFAULT_LOAD_FACTOR;
+ this._growFactor = DEFAULT_GROW_FACTOR;
+ }
+
+ public Int2LongOpenHashTable(int size) {
+ this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true);
+ }
+
+ public Int2LongOpenHashTable(int size, float loadFactor, float growFactor) {
+ this(size, loadFactor, growFactor, true);
+ }
+
+ protected Int2LongOpenHashTable(int size, float loadFactor, float growFactor, boolean forcePrime) {
+ if (size < 1) {
+ throw new IllegalArgumentException();
+ }
+ this._loadFactor = loadFactor;
+ this._growFactor = growFactor;
+ int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size;
+ this._keys = new int[actualSize];
+ this._values = new long[actualSize];
+ this._states = new byte[actualSize];
+ this._used = 0;
+ this._threshold = (int) (actualSize * _loadFactor);
+ }
+
+ public Int2LongOpenHashTable(@Nonnull int[] keys, @Nonnull long[] values,
+ @Nonnull byte[] states, int used) {
+ this._loadFactor = DEFAULT_LOAD_FACTOR;
+ this._growFactor = DEFAULT_GROW_FACTOR;
+ this._keys = keys;
+ this._values = values;
+ this._states = states;
+ this._used = used;
+ this._threshold = keys.length;
+ }
+
+ @Nonnull
+ public static Int2LongOpenHashTable newInstance() {
+ return new Int2LongOpenHashTable(DEFAULT_SIZE);
+ }
+
+ public void defaultReturnValue(long v) {
+ this.defaultReturnValue = v;
+ }
+
+ @Nonnull
+ public int[] getKeys() {
+ return _keys;
+ }
+
+ @Nonnull
+ public long[] getValues() {
+ return _values;
+ }
+
+ @Nonnull
+ public byte[] getStates() {
+ return _states;
+ }
+
+ public boolean containsKey(int key) {
+ return findKey(key) >= 0;
+ }
+
+ /**
+ * @return -1.f if not found
+ */
+ public long get(int key) {
+ int i = findKey(key);
+ if (i < 0) {
+ return defaultReturnValue;
+ }
+ return _values[i];
+ }
+
+ public long put(int key, long value) {
+ int hash = keyHash(key);
+ int keyLength = _keys.length;
+ int keyIdx = hash % keyLength;
+
+ boolean expanded = preAddEntry(keyIdx);
+ if (expanded) {
+ keyLength = _keys.length;
+ keyIdx = hash % keyLength;
+ }
+
+ int[] keys = _keys;
+ long[] values = _values;
+ byte[] states = _states;
+
+ if (states[keyIdx] == FULL) {// double hashing
+ if (keys[keyIdx] == key) {
+ long old = values[keyIdx];
+ values[keyIdx] = value;
+ return old;
+ }
+ // try second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ break;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ long old = values[keyIdx];
+ values[keyIdx] = value;
+ return old;
+ }
+ }
+ }
+ keys[keyIdx] = key;
+ values[keyIdx] = value;
+ states[keyIdx] = FULL;
+ ++_used;
+ return defaultReturnValue;
+ }
+
+ /** Return weather the required slot is free for new entry */
+ protected boolean isFree(int index, int key) {
+ byte stat = _states[index];
+ if (stat == FREE) {
+ return true;
+ }
+ if (stat == REMOVED && _keys[index] == key) {
+ return true;
+ }
+ return false;
+ }
+
+ /** @return expanded or not */
+ protected boolean preAddEntry(int index) {
+ if ((_used + 1) >= _threshold) {// too filled
+ int newCapacity = Math.round(_keys.length * _growFactor);
+ ensureCapacity(newCapacity);
+ return true;
+ }
+ return false;
+ }
+
+ protected int findKey(int key) {
+ int[] keys = _keys;
+ byte[] states = _states;
+ int keyLength = keys.length;
+
+ int hash = keyHash(key);
+ int keyIdx = hash % keyLength;
+ if (states[keyIdx] != FREE) {
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return keyIdx;
+ }
+ // try second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ return -1;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return keyIdx;
+ }
+ }
+ }
+ return -1;
+ }
+
+ public long remove(int key) {
+ int[] keys = _keys;
+ long[] values = _values;
+ byte[] states = _states;
+ int keyLength = keys.length;
+
+ int hash = keyHash(key);
+ int keyIdx = hash % keyLength;
+ if (states[keyIdx] != FREE) {
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ long old = values[keyIdx];
+ states[keyIdx] = REMOVED;
+ --_used;
+ return old;
+ }
+ // second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (states[keyIdx] == FREE) {
+ return defaultReturnValue;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ long old = values[keyIdx];
+ states[keyIdx] = REMOVED;
+ --_used;
+ return old;
+ }
+ }
+ }
+ return defaultReturnValue;
+ }
+
+ public int size() {
+ return _used;
+ }
+
+ public int capacity() {
+ return _keys.length;
+ }
+
+ public void clear() {
+ Arrays.fill(_states, FREE);
+ this._used = 0;
+ }
+
+ public IMapIterator entries() {
+ return new MapIterator();
+ }
+
+ @Override
+ public String toString() {
+ int len = size() * 10 + 2;
+ StringBuilder buf = new StringBuilder(len);
+ buf.append('{');
+ IMapIterator i = entries();
+ while (i.next() != -1) {
+ buf.append(i.getKey());
+ buf.append('=');
+ buf.append(i.getValue());
+ if (i.hasNext()) {
+ buf.append(',');
+ }
+ }
+ buf.append('}');
+ return buf.toString();
+ }
+
+ protected void ensureCapacity(int newCapacity) {
+ int prime = Primes.findLeastPrimeNumber(newCapacity);
+ rehash(prime);
+ this._threshold = Math.round(prime * _loadFactor);
+ }
+
+ private void rehash(int newCapacity) {
+ int oldCapacity = _keys.length;
+ if (newCapacity <= oldCapacity) {
+ throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity);
+ }
+ int[] newkeys = new int[newCapacity];
+ long[] newValues = new long[newCapacity];
+ byte[] newStates = new byte[newCapacity];
+ int used = 0;
+ for (int i = 0; i < oldCapacity; i++) {
+ if (_states[i] == FULL) {
+ used++;
+ int k = _keys[i];
+ long v = _values[i];
+ int hash = keyHash(k);
+ int keyIdx = hash % newCapacity;
+ if (newStates[keyIdx] == FULL) {// second hashing
+ int decr = 1 + (hash % (newCapacity - 2));
+ while (newStates[keyIdx] != FREE) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += newCapacity;
+ }
+ }
+ }
+ newkeys[keyIdx] = k;
+ newValues[keyIdx] = v;
+ newStates[keyIdx] = FULL;
+ }
+ }
+ this._keys = newkeys;
+ this._values = newValues;
+ this._states = newStates;
+ this._used = used;
+ }
+
+ private static int keyHash(int key) {
+ return key & 0x7fffffff;
+ }
+
+ @Override
+ public void writeExternal(ObjectOutput out) throws IOException {
+ out.writeInt(_threshold);
+ out.writeInt(_used);
+
+ final int[] keys = _keys;
+ final int size = keys.length;
+ out.writeInt(size);
+
+ final byte[] states = _states;
+ writeStates(states, out);
+
+ final long[] values = _values;
+ for (int i = 0; i < size; i++) {
+ if (states[i] != FULL) {
+ continue;
+ }
+ ZigZagLEB128Codec.writeSignedInt(keys[i], out);
+ ZigZagLEB128Codec.writeSignedLong(values[i], out);
+ }
+ }
+
+ @Nonnull
+ private static void writeStates(@Nonnull final byte[] status, @Nonnull final DataOutput out)
+ throws IOException {
+ // write empty states's indexes differentially
+ final int size = status.length;
+ int cardinarity = 0;
+ for (int i = 0; i < size; i++) {
+ if (status[i] != FULL) {
+ cardinarity++;
+ }
+ }
+ out.writeInt(cardinarity);
+ if (cardinarity == 0) {
+ return;
+ }
+ int prev = 0;
+ for (int i = 0; i < size; i++) {
+ if (status[i] != FULL) {
+ int diff = i - prev;
+ assert (diff >= 0);
+ VariableByteCodec.encodeUnsignedInt(diff, out);
+ prev = i;
+ }
+ }
+ }
+
+ @Override
+ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+ this._threshold = in.readInt();
+ this._used = in.readInt();
+
+ final int size = in.readInt();
+ final int[] keys = new int[size];
+ final long[] values = new long[size];
+ final byte[] states = new byte[size];
+ readStates(in, states);
+
+ for (int i = 0; i < size; i++) {
+ if (states[i] != FULL) {
+ continue;
+ }
+ keys[i] = ZigZagLEB128Codec.readSignedInt(in);
+ values[i] = ZigZagLEB128Codec.readSignedLong(in);
+ }
+
+ this._keys = keys;
+ this._values = values;
+ this._states = states;
+ }
+
+ @Nonnull
+ private static void readStates(@Nonnull final DataInput in, @Nonnull final byte[] status)
+ throws IOException {
+ // read non-empty states differentially
+ final int cardinarity = in.readInt();
+ Arrays.fill(status, IntOpenHashTable.FULL);
+ int prev = 0;
+ for (int j = 0; j < cardinarity; j++) {
+ int i = VariableByteCodec.decodeUnsignedInt(in) + prev;
+ status[i] = IntOpenHashTable.FREE;
+ prev = i;
+ }
+ }
+
+ public interface IMapIterator {
+
+ public boolean hasNext();
+
+ /**
+ * @return -1 if not found
+ */
+ public int next();
+
+ public int getKey();
+
+ public long getValue();
+
+ }
+
+ private final class MapIterator implements IMapIterator {
+
+ int nextEntry;
+ int lastEntry = -1;
+
+ MapIterator() {
+ this.nextEntry = nextEntry(0);
+ }
+
+ /** find the index of next full entry */
+ int nextEntry(int index) {
+ while (index < _keys.length && _states[index] != FULL) {
+ index++;
+ }
+ return index;
+ }
+
+ public boolean hasNext() {
+ return nextEntry < _keys.length;
+ }
+
+ public int next() {
+ if (!hasNext()) {
+ return -1;
+ }
+ int curEntry = nextEntry;
+ this.lastEntry = curEntry;
+ this.nextEntry = nextEntry(curEntry + 1);
+ return curEntry;
+ }
+
+ public int getKey() {
+ if (lastEntry == -1) {
+ throw new IllegalStateException();
+ }
+ return _keys[lastEntry];
+ }
+
+ public long getValue() {
+ if (lastEntry == -1) {
+ throw new IllegalStateException();
+ }
+ return _values[lastEntry];
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashMap.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashMap.java b/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashMap.java
new file mode 100644
index 0000000..d7ae8d6
--- /dev/null
+++ b/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashMap.java
@@ -0,0 +1,467 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.utils.collections.maps;
+
+import hivemall.utils.math.Primes;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.Arrays;
+
+/**
+ * An open-addressing hash table with double hashing
+ *
+ * @see http://en.wikipedia.org/wiki/Double_hashing
+ */
+public class IntOpenHashMap<V> implements Externalizable {
+ private static final long serialVersionUID = -8162355845665353513L;
+
+ protected static final byte FREE = 0;
+ protected static final byte FULL = 1;
+ protected static final byte REMOVED = 2;
+
+ private static final float DEFAULT_LOAD_FACTOR = 0.7f;
+ private static final float DEFAULT_GROW_FACTOR = 2.0f;
+
+ protected final transient float _loadFactor;
+ protected final transient float _growFactor;
+
+ protected int _used = 0;
+ protected int _threshold;
+
+ protected int[] _keys;
+ protected V[] _values;
+ protected byte[] _states;
+
+ @SuppressWarnings("unchecked")
+ protected IntOpenHashMap(int size, float loadFactor, float growFactor, boolean forcePrime) {
+ if (size < 1) {
+ throw new IllegalArgumentException();
+ }
+ this._loadFactor = loadFactor;
+ this._growFactor = growFactor;
+ int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size;
+ this._keys = new int[actualSize];
+ this._values = (V[]) new Object[actualSize];
+ this._states = new byte[actualSize];
+ this._threshold = Math.round(actualSize * _loadFactor);
+ }
+
+ public IntOpenHashMap(int size, float loadFactor, float growFactor) {
+ this(size, loadFactor, growFactor, true);
+ }
+
+ public IntOpenHashMap(int size) {
+ this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true);
+ }
+
+ public IntOpenHashMap() {// required for serialization
+ this._loadFactor = DEFAULT_LOAD_FACTOR;
+ this._growFactor = DEFAULT_GROW_FACTOR;
+ }
+
+ public boolean containsKey(int key) {
+ return findKey(key) >= 0;
+ }
+
+ public final V get(final int key) {
+ final int i = findKey(key);
+ if (i < 0) {
+ return null;
+ }
+ recordAccess(i);
+ return _values[i];
+ }
+
+ public V put(final int key, final V value) {
+ final int hash = keyHash(key);
+ int keyLength = _keys.length;
+ int keyIdx = hash % keyLength;
+
+ final boolean expanded = preAddEntry(keyIdx);
+ if (expanded) {
+ keyLength = _keys.length;
+ keyIdx = hash % keyLength;
+ }
+
+ final int[] keys = _keys;
+ final V[] values = _values;
+ final byte[] states = _states;
+
+ if (states[keyIdx] == FULL) {// double hashing
+ if (keys[keyIdx] == key) {
+ V old = values[keyIdx];
+ values[keyIdx] = value;
+ recordAccess(keyIdx);
+ return old;
+ }
+ // try second hash
+ final int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ break;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ V old = values[keyIdx];
+ values[keyIdx] = value;
+ recordAccess(keyIdx);
+ return old;
+ }
+ }
+ }
+ keys[keyIdx] = key;
+ values[keyIdx] = value;
+ states[keyIdx] = FULL;
+ ++_used;
+ postAddEntry(keyIdx);
+ return null;
+ }
+
+ public V putIfAbsent(final int key, final V value) {
+ final int hash = keyHash(key);
+ int keyLength = _keys.length;
+ int keyIdx = hash % keyLength;
+
+ final boolean expanded = preAddEntry(keyIdx);
+ if (expanded) {
+ keyLength = _keys.length;
+ keyIdx = hash % keyLength;
+ }
+
+ final int[] keys = _keys;
+ final V[] values = _values;
+ final byte[] states = _states;
+
+ if (states[keyIdx] == FULL) {// second hashing
+ if (keys[keyIdx] == key) {
+ return values[keyIdx];
+ }
+ // try second hash
+ final int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ break;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return values[keyIdx];
+ }
+ }
+ }
+ keys[keyIdx] = key;
+ values[keyIdx] = value;
+ states[keyIdx] = FULL;
+ _used++;
+ postAddEntry(keyIdx);
+ return null;
+ }
+
+ /** Return weather the required slot is free for new entry */
+ protected boolean isFree(int index, int key) {
+ byte stat = _states[index];
+ if (stat == FREE) {
+ return true;
+ }
+ if (stat == REMOVED && _keys[index] == key) {
+ return true;
+ }
+ return false;
+ }
+
+ /** @return expanded or not */
+ protected boolean preAddEntry(int index) {
+ if ((_used + 1) >= _threshold) {// too filled
+ int newCapacity = Math.round(_keys.length * _growFactor);
+ ensureCapacity(newCapacity);
+ return true;
+ }
+ return false;
+ }
+
+ protected void postAddEntry(int index) {}
+
+ private int findKey(int key) {
+ int[] keys = _keys;
+ byte[] states = _states;
+ int keyLength = keys.length;
+
+ int hash = keyHash(key);
+ int keyIdx = hash % keyLength;
+ if (states[keyIdx] != FREE) {
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return keyIdx;
+ }
+ // try second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ return -1;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return keyIdx;
+ }
+ }
+ }
+ return -1;
+ }
+
+ public V remove(int key) {
+ int[] keys = _keys;
+ V[] values = _values;
+ byte[] states = _states;
+ int keyLength = keys.length;
+
+ int hash = keyHash(key);
+ int keyIdx = hash % keyLength;
+ if (states[keyIdx] != FREE) {
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ V old = values[keyIdx];
+ states[keyIdx] = REMOVED;
+ --_used;
+ recordRemoval(keyIdx);
+ return old;
+ }
+ // second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (states[keyIdx] == FREE) {
+ return null;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ V old = values[keyIdx];
+ states[keyIdx] = REMOVED;
+ --_used;
+ recordRemoval(keyIdx);
+ return old;
+ }
+ }
+ }
+ return null;
+ }
+
+ public int size() {
+ return _used;
+ }
+
+ public void clear() {
+ Arrays.fill(_states, FREE);
+ this._used = 0;
+ }
+
+ @SuppressWarnings("unchecked")
+ public IMapIterator<V> entries() {
+ return new MapIterator();
+ }
+
+ @Override
+ public String toString() {
+ int len = size() * 10 + 2;
+ StringBuilder buf = new StringBuilder(len);
+ buf.append('{');
+ IMapIterator<V> i = entries();
+ while (i.next() != -1) {
+ buf.append(i.getKey());
+ buf.append('=');
+ buf.append(i.getValue());
+ if (i.hasNext()) {
+ buf.append(',');
+ }
+ }
+ buf.append('}');
+ return buf.toString();
+ }
+
+ private void ensureCapacity(int newCapacity) {
+ int prime = Primes.findLeastPrimeNumber(newCapacity);
+ rehash(prime);
+ this._threshold = Math.round(prime * _loadFactor);
+ }
+
+ @SuppressWarnings("unchecked")
+ protected void rehash(int newCapacity) {
+ int oldCapacity = _keys.length;
+ if (newCapacity <= oldCapacity) {
+ throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity);
+ }
+ final int[] oldKeys = _keys;
+ final V[] oldValues = _values;
+ final byte[] oldStates = _states;
+ int[] newkeys = new int[newCapacity];
+ V[] newValues = (V[]) new Object[newCapacity];
+ byte[] newStates = new byte[newCapacity];
+ int used = 0;
+ for (int i = 0; i < oldCapacity; i++) {
+ if (oldStates[i] == FULL) {
+ used++;
+ int k = oldKeys[i];
+ V v = oldValues[i];
+ int hash = keyHash(k);
+ int keyIdx = hash % newCapacity;
+ if (newStates[keyIdx] == FULL) {// second hashing
+ int decr = 1 + (hash % (newCapacity - 2));
+ while (newStates[keyIdx] != FREE) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += newCapacity;
+ }
+ }
+ }
+ newkeys[keyIdx] = k;
+ newValues[keyIdx] = v;
+ newStates[keyIdx] = FULL;
+ }
+ }
+ this._keys = newkeys;
+ this._values = newValues;
+ this._states = newStates;
+ this._used = used;
+ }
+
+ private static int keyHash(int key) {
+ return key & 0x7fffffff;
+ }
+
+ protected void recordAccess(int idx) {};
+
+ protected void recordRemoval(int idx) {};
+
+ public void writeExternal(ObjectOutput out) throws IOException {
+ out.writeInt(_threshold);
+ out.writeInt(_used);
+
+ out.writeInt(_keys.length);
+ IMapIterator<V> i = entries();
+ while (i.next() != -1) {
+ out.writeInt(i.getKey());
+ out.writeObject(i.getValue());
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+ this._threshold = in.readInt();
+ this._used = in.readInt();
+
+ int keylen = in.readInt();
+ int[] keys = new int[keylen];
+ V[] values = (V[]) new Object[keylen];
+ byte[] states = new byte[keylen];
+ for (int i = 0; i < _used; i++) {
+ int k = in.readInt();
+ V v = (V) in.readObject();
+ int hash = keyHash(k);
+ int keyIdx = hash % keylen;
+ if (states[keyIdx] != FREE) {// second hash
+ int decr = 1 + (hash % (keylen - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keylen;
+ }
+ if (states[keyIdx] == FREE) {
+ break;
+ }
+ }
+ }
+ states[keyIdx] = FULL;
+ keys[keyIdx] = k;
+ values[keyIdx] = v;
+ }
+ this._keys = keys;
+ this._values = values;
+ this._states = states;
+ }
+
+ public interface IMapIterator<V> {
+
+ public boolean hasNext();
+
+ public int next();
+
+ public int getKey();
+
+ public V getValue();
+
+ }
+
+ @SuppressWarnings("rawtypes")
+ private final class MapIterator implements IMapIterator {
+
+ int nextEntry;
+ int lastEntry = -1;
+
+ MapIterator() {
+ this.nextEntry = nextEntry(0);
+ }
+
+ /** find the index of next full entry */
+ int nextEntry(int index) {
+ while (index < _keys.length && _states[index] != FULL) {
+ index++;
+ }
+ return index;
+ }
+
+ public boolean hasNext() {
+ return nextEntry < _keys.length;
+ }
+
+ public int next() {
+ if (!hasNext()) {
+ return -1;
+ }
+ int curEntry = nextEntry;
+ this.lastEntry = curEntry;
+ this.nextEntry = nextEntry(curEntry + 1);
+ return curEntry;
+ }
+
+ public int getKey() {
+ if (lastEntry == -1) {
+ throw new IllegalStateException();
+ }
+ return _keys[lastEntry];
+ }
+
+ public V getValue() {
+ if (lastEntry == -1) {
+ throw new IllegalStateException();
+ }
+ return _values[lastEntry];
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashTable.java
new file mode 100644
index 0000000..dcb64d1
--- /dev/null
+++ b/core/src/main/java/hivemall/utils/collections/maps/IntOpenHashTable.java
@@ -0,0 +1,404 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.utils.collections.maps;
+
+import hivemall.utils.math.Primes;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.Arrays;
+import java.util.HashMap;
+
+import javax.annotation.Nonnull;
+
+/**
+ * An open-addressing hash table with double-hashing that requires less memory to {@link HashMap}.
+ */
+public final class IntOpenHashTable<V> implements Externalizable {
+
+ public static final float DEFAULT_LOAD_FACTOR = 0.7f;
+ public static final float DEFAULT_GROW_FACTOR = 2.0f;
+
+ public static final byte FREE = 0;
+ public static final byte FULL = 1;
+ public static final byte REMOVED = 2;
+
+ protected/* final */float _loadFactor;
+ protected/* final */float _growFactor;
+
+ protected int _used = 0;
+ protected int _threshold;
+
+ protected int[] _keys;
+ protected V[] _values;
+ protected byte[] _states;
+
+ public IntOpenHashTable() {} // for Externalizable
+
+ public IntOpenHashTable(int size) {
+ this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR);
+ }
+
+ @SuppressWarnings("unchecked")
+ public IntOpenHashTable(int size, float loadFactor, float growFactor) {
+ if (size < 1) {
+ throw new IllegalArgumentException();
+ }
+ this._loadFactor = loadFactor;
+ this._growFactor = growFactor;
+ int actualSize = Primes.findLeastPrimeNumber(size);
+ this._keys = new int[actualSize];
+ this._values = (V[]) new Object[actualSize];
+ this._states = new byte[actualSize];
+ this._threshold = Math.round(actualSize * _loadFactor);
+ }
+
+ public IntOpenHashTable(@Nonnull int[] keys, @Nonnull V[] values, @Nonnull byte[] states,
+ int used) {
+ this._used = used;
+ this._threshold = keys.length;
+ this._keys = keys;
+ this._values = values;
+ this._states = states;
+ }
+
+ public int[] getKeys() {
+ return _keys;
+ }
+
+ public Object[] getValues() {
+ return _values;
+ }
+
+ public byte[] getStates() {
+ return _states;
+ }
+
+ public boolean containsKey(final int key) {
+ return findKey(key) >= 0;
+ }
+
+ public V get(final int key) {
+ final int i = findKey(key);
+ if (i < 0) {
+ return null;
+ }
+ return _values[i];
+ }
+
+ public V put(final int key, final V value) {
+ final int hash = keyHash(key);
+ int keyLength = _keys.length;
+ int keyIdx = hash % keyLength;
+
+ boolean expanded = preAddEntry(keyIdx);
+ if (expanded) {
+ keyLength = _keys.length;
+ keyIdx = hash % keyLength;
+ }
+
+ final int[] keys = _keys;
+ final V[] values = _values;
+ final byte[] states = _states;
+
+ if (states[keyIdx] == FULL) {
+ if (keys[keyIdx] == key) {
+ V old = values[keyIdx];
+ values[keyIdx] = value;
+ return old;
+ }
+ // try second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ break;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ V old = values[keyIdx];
+ values[keyIdx] = value;
+ return old;
+ }
+ }
+ }
+ keys[keyIdx] = key;
+ values[keyIdx] = value;
+ states[keyIdx] = FULL;
+ ++_used;
+ return null;
+ }
+
+
+ /** Return weather the required slot is free for new entry */
+ protected boolean isFree(int index, int key) {
+ byte stat = _states[index];
+ if (stat == FREE) {
+ return true;
+ }
+ if (stat == REMOVED && _keys[index] == key) {
+ return true;
+ }
+ return false;
+ }
+
+ /** @return expanded or not */
+ protected boolean preAddEntry(int index) {
+ if ((_used + 1) >= _threshold) {// filled enough
+ int newCapacity = Math.round(_keys.length * _growFactor);
+ ensureCapacity(newCapacity);
+ return true;
+ }
+ return false;
+ }
+
+ protected int findKey(final int key) {
+ final int[] keys = _keys;
+ final byte[] states = _states;
+ final int keyLength = keys.length;
+
+ final int hash = keyHash(key);
+ int keyIdx = hash % keyLength;
+ if (states[keyIdx] != FREE) {
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return keyIdx;
+ }
+ // try second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ return -1;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return keyIdx;
+ }
+ }
+ }
+ return -1;
+ }
+
+ public V remove(final int key) {
+ final int[] keys = _keys;
+ final V[] values = _values;
+ final byte[] states = _states;
+ final int keyLength = keys.length;
+
+ final int hash = keyHash(key);
+ int keyIdx = hash % keyLength;
+ if (states[keyIdx] != FREE) {
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ V old = values[keyIdx];
+ states[keyIdx] = REMOVED;
+ --_used;
+ return old;
+ }
+ // second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (states[keyIdx] == FREE) {
+ return null;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ V old = values[keyIdx];
+ states[keyIdx] = REMOVED;
+ --_used;
+ return old;
+ }
+ }
+ }
+ return null;
+ }
+
+ @Nonnull
+ public IMapIterator<V> entries() {
+ return new MapIterator();
+ }
+
+ public int size() {
+ return _used;
+ }
+
+ public int capacity() {
+ return _keys.length;
+ }
+
+ public void clear() {
+ Arrays.fill(_states, FREE);
+ this._used = 0;
+ }
+
+ protected void ensureCapacity(int newCapacity) {
+ int prime = Primes.findLeastPrimeNumber(newCapacity);
+ rehash(prime);
+ this._threshold = Math.round(prime * _loadFactor);
+ }
+
+ @SuppressWarnings("unchecked")
+ private void rehash(int newCapacity) {
+ int oldCapacity = _keys.length;
+ if (newCapacity <= oldCapacity) {
+ throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity);
+ }
+ final int[] newkeys = new int[newCapacity];
+ final V[] newValues = (V[]) new Object[newCapacity];
+ final byte[] newStates = new byte[newCapacity];
+ int used = 0;
+ for (int i = 0; i < oldCapacity; i++) {
+ if (_states[i] == FULL) {
+ used++;
+ int k = _keys[i];
+ V v = _values[i];
+ int hash = keyHash(k);
+ int keyIdx = hash % newCapacity;
+ if (newStates[keyIdx] == FULL) {// second hashing
+ int decr = 1 + (hash % (newCapacity - 2));
+ while (newStates[keyIdx] != FREE) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += newCapacity;
+ }
+ }
+ }
+ newStates[keyIdx] = FULL;
+ newkeys[keyIdx] = k;
+ newValues[keyIdx] = v;
+ }
+ }
+ this._keys = newkeys;
+ this._values = newValues;
+ this._states = newStates;
+ this._used = used;
+ }
+
+ private static int keyHash(final int key) {
+ return key & 0x7fffffff;
+ }
+
+ @Override
+ public void writeExternal(ObjectOutput out) throws IOException {
+ out.writeFloat(_loadFactor);
+ out.writeFloat(_growFactor);
+ out.writeInt(_used);
+
+ final int size = _keys.length;
+ out.writeInt(size);
+
+ for (int i = 0; i < size; i++) {
+ out.writeInt(_keys[i]);
+ out.writeObject(_values[i]);
+ out.writeByte(_states[i]);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+ this._loadFactor = in.readFloat();
+ this._growFactor = in.readFloat();
+ this._used = in.readInt();
+
+ final int size = in.readInt();
+ final int[] keys = new int[size];
+ final Object[] values = new Object[size];
+ final byte[] states = new byte[size];
+ for (int i = 0; i < size; i++) {
+ keys[i] = in.readInt();
+ values[i] = in.readObject();
+ states[i] = in.readByte();
+ }
+ this._threshold = size;
+ this._keys = keys;
+ this._values = (V[]) values;
+ this._states = states;
+ }
+
+ public interface IMapIterator<V> {
+
+ public boolean hasNext();
+
+ /**
+ * @return -1 if not found
+ */
+ public int next();
+
+ public int getKey();
+
+ public V getValue();
+
+ }
+
+ private final class MapIterator implements IMapIterator<V> {
+
+ int nextEntry;
+ int lastEntry = -1;
+
+ MapIterator() {
+ this.nextEntry = nextEntry(0);
+ }
+
+ /** find the index of next full entry */
+ int nextEntry(int index) {
+ while (index < _keys.length && _states[index] != FULL) {
+ index++;
+ }
+ return index;
+ }
+
+ public boolean hasNext() {
+ return nextEntry < _keys.length;
+ }
+
+ public int next() {
+ if (!hasNext()) {
+ return -1;
+ }
+ int curEntry = nextEntry;
+ this.lastEntry = curEntry;
+ this.nextEntry = nextEntry(curEntry + 1);
+ return curEntry;
+ }
+
+ public int getKey() {
+ if (lastEntry == -1) {
+ throw new IllegalStateException();
+ }
+ return _keys[lastEntry];
+ }
+
+ public V getValue() {
+ if (lastEntry == -1) {
+ throw new IllegalStateException();
+ }
+ return _values[lastEntry];
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/LRUMap.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/collections/maps/LRUMap.java b/core/src/main/java/hivemall/utils/collections/maps/LRUMap.java
new file mode 100644
index 0000000..84679c7
--- /dev/null
+++ b/core/src/main/java/hivemall/utils/collections/maps/LRUMap.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.utils.collections.maps;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+public class LRUMap<K, V> extends LinkedHashMap<K, V> {
+ private static final long serialVersionUID = -7708264099645977733L;
+
+ private final int cacheSize;
+
+ public LRUMap(int cacheSize) {
+ this(cacheSize, 0.75f, cacheSize);
+ }
+
+ public LRUMap(int capacity, float loadFactor, int cacheSize) {
+ super(capacity, loadFactor, true);
+ this.cacheSize = cacheSize;
+ }
+
+ protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
+ return size() > cacheSize;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/8dc3a024/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java b/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java
new file mode 100644
index 0000000..c758824
--- /dev/null
+++ b/core/src/main/java/hivemall/utils/collections/maps/Long2DoubleOpenHashTable.java
@@ -0,0 +1,445 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package hivemall.utils.collections.maps;
+
+import hivemall.utils.math.Primes;
+
+import java.io.Externalizable;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.Arrays;
+
+/**
+ * An open-addressing hash table with double hashing
+ *
+ * @see http://en.wikipedia.org/wiki/Double_hashing
+ */
+public final class Long2DoubleOpenHashTable implements Externalizable {
+
+ protected static final byte FREE = 0;
+ protected static final byte FULL = 1;
+ protected static final byte REMOVED = 2;
+
+ private static final float DEFAULT_LOAD_FACTOR = 0.7f;
+ private static final float DEFAULT_GROW_FACTOR = 2.0f;
+
+ protected final transient float _loadFactor;
+ protected final transient float _growFactor;
+
+ protected int _used = 0;
+ protected int _threshold;
+ protected double defaultReturnValue = 0.d;
+
+ protected long[] _keys;
+ protected double[] _values;
+ protected byte[] _states;
+
+ protected Long2DoubleOpenHashTable(int size, float loadFactor, float growFactor,
+ boolean forcePrime) {
+ if (size < 1) {
+ throw new IllegalArgumentException();
+ }
+ this._loadFactor = loadFactor;
+ this._growFactor = growFactor;
+ int actualSize = forcePrime ? Primes.findLeastPrimeNumber(size) : size;
+ this._keys = new long[actualSize];
+ this._values = new double[actualSize];
+ this._states = new byte[actualSize];
+ this._threshold = (int) (actualSize * _loadFactor);
+ }
+
+ public Long2DoubleOpenHashTable(int size, int loadFactor, int growFactor) {
+ this(size, loadFactor, growFactor, true);
+ }
+
+ public Long2DoubleOpenHashTable(int size) {
+ this(size, DEFAULT_LOAD_FACTOR, DEFAULT_GROW_FACTOR, true);
+ }
+
+ public Long2DoubleOpenHashTable() {// required for serialization
+ this._loadFactor = DEFAULT_LOAD_FACTOR;
+ this._growFactor = DEFAULT_GROW_FACTOR;
+ }
+
+ public void defaultReturnValue(double v) {
+ this.defaultReturnValue = v;
+ }
+
+ public boolean containsKey(final long key) {
+ return _findKey(key) >= 0;
+ }
+
+ /**
+ * @return defaultReturnValue if not found
+ */
+ public double get(final long key) {
+ return get(key, defaultReturnValue);
+ }
+
+ public double get(final long key, final double defaultValue) {
+ final int i = _findKey(key);
+ if (i < 0) {
+ return defaultValue;
+ }
+ return _values[i];
+ }
+
+ public double _get(final int index) {
+ if (index < 0) {
+ return defaultReturnValue;
+ }
+ return _values[index];
+ }
+
+ public double _set(final int index, final double value) {
+ double old = _values[index];
+ _values[index] = value;
+ return old;
+ }
+
+ public double _remove(final int index) {
+ _states[index] = REMOVED;
+ --_used;
+ return _values[index];
+ }
+
+ public double put(final long key, final double value) {
+ return put(key, value, defaultReturnValue);
+ }
+
+ public double put(final long key, final double value, final double defaultValue) {
+ final int hash = keyHash(key);
+ int keyLength = _keys.length;
+ int keyIdx = hash % keyLength;
+
+ boolean expanded = preAddEntry(keyIdx);
+ if (expanded) {
+ keyLength = _keys.length;
+ keyIdx = hash % keyLength;
+ }
+
+ final long[] keys = _keys;
+ final double[] values = _values;
+ final byte[] states = _states;
+
+ if (states[keyIdx] == FULL) {// double hashing
+ if (keys[keyIdx] == key) {
+ double old = values[keyIdx];
+ values[keyIdx] = value;
+ return old;
+ }
+ // try second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ break;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ double old = values[keyIdx];
+ values[keyIdx] = value;
+ return old;
+ }
+ }
+ }
+ keys[keyIdx] = key;
+ values[keyIdx] = value;
+ states[keyIdx] = FULL;
+ ++_used;
+ return defaultValue;
+ }
+
+ /** Return weather the required slot is free for new entry */
+ protected boolean isFree(final int index, final long key) {
+ final byte stat = _states[index];
+ if (stat == FREE) {
+ return true;
+ }
+ if (stat == REMOVED && _keys[index] == key) {
+ return true;
+ }
+ return false;
+ }
+
+ /** @return expanded or not */
+ protected boolean preAddEntry(final int index) {
+ if ((_used + 1) >= _threshold) {// too filled
+ int newCapacity = Math.round(_keys.length * _growFactor);
+ ensureCapacity(newCapacity);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * @return -1 if not found
+ */
+ public int _findKey(final long key) {
+ final long[] keys = _keys;
+ final byte[] states = _states;
+ final int keyLength = keys.length;
+
+ final int hash = keyHash(key);
+ int keyIdx = hash % keyLength;
+ if (states[keyIdx] != FREE) {
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return keyIdx;
+ }
+ // try second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (isFree(keyIdx, key)) {
+ return -1;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ return keyIdx;
+ }
+ }
+ }
+ return -1;
+ }
+
+ public double remove(final long key) {
+ final long[] keys = _keys;
+ final double[] values = _values;
+ final byte[] states = _states;
+ final int keyLength = keys.length;
+
+ final int hash = keyHash(key);
+ int keyIdx = hash % keyLength;
+ if (states[keyIdx] != FREE) {
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ double old = values[keyIdx];
+ states[keyIdx] = REMOVED;
+ --_used;
+ return old;
+ }
+ // second hash
+ int decr = 1 + (hash % (keyLength - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keyLength;
+ }
+ if (states[keyIdx] == FREE) {
+ return defaultReturnValue;
+ }
+ if (states[keyIdx] == FULL && keys[keyIdx] == key) {
+ double old = values[keyIdx];
+ states[keyIdx] = REMOVED;
+ --_used;
+ return old;
+ }
+ }
+ }
+ return defaultReturnValue;
+ }
+
+ public int size() {
+ return _used;
+ }
+
+ public void clear() {
+ Arrays.fill(_states, FREE);
+ this._used = 0;
+ }
+
+ public IMapIterator entries() {
+ return new MapIterator();
+ }
+
+ @Override
+ public String toString() {
+ int len = size() * 10 + 2;
+ StringBuilder buf = new StringBuilder(len);
+ buf.append('{');
+ IMapIterator i = entries();
+ while (i.next() != -1) {
+ buf.append(i.getKey());
+ buf.append('=');
+ buf.append(i.getValue());
+ if (i.hasNext()) {
+ buf.append(',');
+ }
+ }
+ buf.append('}');
+ return buf.toString();
+ }
+
+ protected void ensureCapacity(final int newCapacity) {
+ int prime = Primes.findLeastPrimeNumber(newCapacity);
+ rehash(prime);
+ this._threshold = Math.round(prime * _loadFactor);
+ }
+
+ private void rehash(final int newCapacity) {
+ int oldCapacity = _keys.length;
+ if (newCapacity <= oldCapacity) {
+ throw new IllegalArgumentException("new: " + newCapacity + ", old: " + oldCapacity);
+ }
+ final long[] newkeys = new long[newCapacity];
+ final double[] newValues = new double[newCapacity];
+ final byte[] newStates = new byte[newCapacity];
+ int used = 0;
+ for (int i = 0; i < oldCapacity; i++) {
+ if (_states[i] == FULL) {
+ used++;
+ long k = _keys[i];
+ double v = _values[i];
+ int hash = keyHash(k);
+ int keyIdx = hash % newCapacity;
+ if (newStates[keyIdx] == FULL) {// second hashing
+ int decr = 1 + (hash % (newCapacity - 2));
+ while (newStates[keyIdx] != FREE) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += newCapacity;
+ }
+ }
+ }
+ newkeys[keyIdx] = k;
+ newValues[keyIdx] = v;
+ newStates[keyIdx] = FULL;
+ }
+ }
+ this._keys = newkeys;
+ this._values = newValues;
+ this._states = newStates;
+ this._used = used;
+ }
+
+ private static int keyHash(final long key) {
+ return (int) (key ^ (key >>> 32)) & 0x7FFFFFFF;
+ }
+
+ public void writeExternal(ObjectOutput out) throws IOException {
+ out.writeInt(_threshold);
+ out.writeInt(_used);
+
+ out.writeInt(_keys.length);
+ IMapIterator i = entries();
+ while (i.next() != -1) {
+ out.writeLong(i.getKey());
+ out.writeDouble(i.getValue());
+ }
+ }
+
+ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
+ this._threshold = in.readInt();
+ this._used = in.readInt();
+
+ final int keylen = in.readInt();
+ final long[] keys = new long[keylen];
+ final double[] values = new double[keylen];
+ final byte[] states = new byte[keylen];
+ for (int i = 0; i < _used; i++) {
+ long k = in.readLong();
+ double v = in.readDouble();
+ int hash = keyHash(k);
+ int keyIdx = hash % keylen;
+ if (states[keyIdx] != FREE) {// second hash
+ int decr = 1 + (hash % (keylen - 2));
+ for (;;) {
+ keyIdx -= decr;
+ if (keyIdx < 0) {
+ keyIdx += keylen;
+ }
+ if (states[keyIdx] == FREE) {
+ break;
+ }
+ }
+ }
+ states[keyIdx] = FULL;
+ keys[keyIdx] = k;
+ values[keyIdx] = v;
+ }
+ this._keys = keys;
+ this._values = values;
+ this._states = states;
+ }
+
+ public interface IMapIterator {
+
+ public boolean hasNext();
+
+ /**
+ * @return -1 if not found
+ */
+ public int next();
+
+ public long getKey();
+
+ public double getValue();
+
+ }
+
+ private final class MapIterator implements IMapIterator {
+
+ int nextEntry;
+ int lastEntry = -1;
+
+ MapIterator() {
+ this.nextEntry = nextEntry(0);
+ }
+
+ /** find the index of next full entry */
+ int nextEntry(int index) {
+ while (index < _keys.length && _states[index] != FULL) {
+ index++;
+ }
+ return index;
+ }
+
+ public boolean hasNext() {
+ return nextEntry < _keys.length;
+ }
+
+ public int next() {
+ if (!hasNext()) {
+ return -1;
+ }
+ int curEntry = nextEntry;
+ this.lastEntry = curEntry;
+ this.nextEntry = nextEntry(curEntry + 1);
+ return curEntry;
+ }
+
+ public long getKey() {
+ if (lastEntry == -1) {
+ throw new IllegalStateException();
+ }
+ return _keys[lastEntry];
+ }
+
+ public double getValue() {
+ if (lastEntry == -1) {
+ throw new IllegalStateException();
+ }
+ return _values[lastEntry];
+ }
+ }
+}