You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by pa...@apache.org on 2015/04/01 20:08:17 UTC
[46/51] [partial] mahout git commit: MAHOUT-1655 Refactors mr-legacy
into mahout-hdfs and mahout-mr, closes apache/mahout#86
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
new file mode 100644
index 0000000..fde8958
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
@@ -0,0 +1,661 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Serializable;
+import java.util.AbstractCollection;
+import java.util.AbstractSet;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+import org.apache.mahout.common.RandomUtils;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * @see FastMap
+ * @see FastIDSet
+ */
+public final class FastByIDMap<V> implements Serializable, Cloneable {
+
+ public static final int NO_MAX_SIZE = Integer.MAX_VALUE;
+ private static final float DEFAULT_LOAD_FACTOR = 1.5f;
+
+ /** Dummy object used to represent a key that has been removed. */
+ private static final long REMOVED = Long.MAX_VALUE;
+ private static final long NULL = Long.MIN_VALUE;
+
+ private long[] keys;
+ private V[] values;
+ private float loadFactor;
+ private int numEntries;
+ private int numSlotsUsed;
+ private final int maxSize;
+ private BitSet recentlyAccessed;
+ private final boolean countingAccesses;
+
+ /** Creates a new {@link FastByIDMap} with default capacity. */
+ public FastByIDMap() {
+ this(2, NO_MAX_SIZE);
+ }
+
+ public FastByIDMap(int size) {
+ this(size, NO_MAX_SIZE);
+ }
+
+ public FastByIDMap(int size, float loadFactor) {
+ this(size, NO_MAX_SIZE, loadFactor);
+ }
+
+ public FastByIDMap(int size, int maxSize) {
+ this(size, maxSize, DEFAULT_LOAD_FACTOR);
+ }
+
+ /**
+ * Creates a new {@link FastByIDMap} whose capacity can accommodate the given number of entries without rehash.
+ *
+ * @param size desired capacity
+ * @param maxSize max capacity
+ * @param loadFactor ratio of internal hash table size to current size
+ * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1
+ * or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or
+ * loadFactor is less than 1
+ */
+ public FastByIDMap(int size, int maxSize, float loadFactor) {
+ Preconditions.checkArgument(size >= 0, "size must be at least 0");
+ Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0");
+ this.loadFactor = loadFactor;
+ int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
+ Preconditions.checkArgument(size < max, "size must be less than " + max);
+ Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1");
+ int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
+ keys = new long[hashSize];
+ Arrays.fill(keys, NULL);
+ values = (V[]) new Object[hashSize];
+ this.maxSize = maxSize;
+ this.countingAccesses = maxSize != Integer.MAX_VALUE;
+ this.recentlyAccessed = countingAccesses ? new BitSet(hashSize) : null;
+ }
+
+ /**
+ * @see #findForAdd(long)
+ */
+ private int find(long key) {
+ int theHashCode = (int) key & 0x7FFFFFFF; // make sure it's positive
+ long[] keys = this.keys;
+ int hashSize = keys.length;
+ int jump = 1 + theHashCode % (hashSize - 2);
+ int index = theHashCode % hashSize;
+ long currentKey = keys[index];
+ while (currentKey != NULL && key != currentKey) {
+ index -= index < jump ? jump - hashSize : jump;
+ currentKey = keys[index];
+ }
+ return index;
+ }
+
+ /**
+ * @see #find(long)
+ */
+ private int findForAdd(long key) {
+ int theHashCode = (int) key & 0x7FFFFFFF; // make sure it's positive
+ long[] keys = this.keys;
+ int hashSize = keys.length;
+ int jump = 1 + theHashCode % (hashSize - 2);
+ int index = theHashCode % hashSize;
+ long currentKey = keys[index];
+ while (currentKey != NULL && currentKey != REMOVED && key != currentKey) {
+ index -= index < jump ? jump - hashSize : jump;
+ currentKey = keys[index];
+ }
+ if (currentKey != REMOVED) {
+ return index;
+ }
+ // If we're adding, it's here, but, the key might have a value already later
+ int addIndex = index;
+ while (currentKey != NULL && key != currentKey) {
+ index -= index < jump ? jump - hashSize : jump;
+ currentKey = keys[index];
+ }
+ return key == currentKey ? index : addIndex;
+ }
+
+ public V get(long key) {
+ if (key == NULL) {
+ return null;
+ }
+ int index = find(key);
+ if (countingAccesses) {
+ recentlyAccessed.set(index);
+ }
+ return values[index];
+ }
+
+ public int size() {
+ return numEntries;
+ }
+
+ public boolean isEmpty() {
+ return numEntries == 0;
+ }
+
+ public boolean containsKey(long key) {
+ return key != NULL && key != REMOVED && keys[find(key)] != NULL;
+ }
+
+ public boolean containsValue(Object value) {
+ if (value == null) {
+ return false;
+ }
+ for (V theValue : values) {
+ if (theValue != null && value.equals(theValue)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ public V put(long key, V value) {
+ Preconditions.checkArgument(key != NULL && key != REMOVED);
+ Preconditions.checkNotNull(value);
+ // If less than half the slots are open, let's clear it up
+ if (numSlotsUsed * loadFactor >= keys.length) {
+ // If over half the slots used are actual entries, let's grow
+ if (numEntries * loadFactor >= numSlotsUsed) {
+ growAndRehash();
+ } else {
+ // Otherwise just rehash to clear REMOVED entries and don't grow
+ rehash();
+ }
+ }
+ // Here we may later consider implementing Brent's variation described on page 532
+ int index = findForAdd(key);
+ long keyIndex = keys[index];
+ if (keyIndex == key) {
+ V oldValue = values[index];
+ values[index] = value;
+ return oldValue;
+ }
+ // If size is limited,
+ if (countingAccesses && numEntries >= maxSize) {
+ // and we're too large, clear some old-ish entry
+ clearStaleEntry(index);
+ }
+ keys[index] = key;
+ values[index] = value;
+ numEntries++;
+ if (keyIndex == NULL) {
+ numSlotsUsed++;
+ }
+ return null;
+ }
+
+ private void clearStaleEntry(int index) {
+ while (true) {
+ long currentKey;
+ do {
+ if (index == 0) {
+ index = keys.length - 1;
+ } else {
+ index--;
+ }
+ currentKey = keys[index];
+ } while (currentKey == NULL || currentKey == REMOVED);
+ if (recentlyAccessed.get(index)) {
+ recentlyAccessed.clear(index);
+ } else {
+ break;
+ }
+ }
+ // Delete the entry
+ keys[index] = REMOVED;
+ numEntries--;
+ values[index] = null;
+ }
+
+ public V remove(long key) {
+ if (key == NULL || key == REMOVED) {
+ return null;
+ }
+ int index = find(key);
+ if (keys[index] == NULL) {
+ return null;
+ } else {
+ keys[index] = REMOVED;
+ numEntries--;
+ V oldValue = values[index];
+ values[index] = null;
+ // don't decrement numSlotsUsed
+ return oldValue;
+ }
+ // Could un-set recentlyAccessed's bit but doesn't matter
+ }
+
+ public void clear() {
+ numEntries = 0;
+ numSlotsUsed = 0;
+ Arrays.fill(keys, NULL);
+ Arrays.fill(values, null);
+ if (countingAccesses) {
+ recentlyAccessed.clear();
+ }
+ }
+
+ public LongPrimitiveIterator keySetIterator() {
+ return new KeyIterator();
+ }
+
+ public Set<Map.Entry<Long,V>> entrySet() {
+ return new EntrySet();
+ }
+
+ public Collection<V> values() {
+ return new ValueCollection();
+ }
+
+ public void rehash() {
+ rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries)));
+ }
+
+ private void growAndRehash() {
+ if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
+ throw new IllegalStateException("Can't grow any more");
+ }
+ rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length)));
+ }
+
+ private void rehash(int newHashSize) {
+ long[] oldKeys = keys;
+ V[] oldValues = values;
+ numEntries = 0;
+ numSlotsUsed = 0;
+ if (countingAccesses) {
+ recentlyAccessed = new BitSet(newHashSize);
+ }
+ keys = new long[newHashSize];
+ Arrays.fill(keys, NULL);
+ values = (V[]) new Object[newHashSize];
+ int length = oldKeys.length;
+ for (int i = 0; i < length; i++) {
+ long key = oldKeys[i];
+ if (key != NULL && key != REMOVED) {
+ put(key, oldValues[i]);
+ }
+ }
+ }
+
+ void iteratorRemove(int lastNext) {
+ if (lastNext >= values.length) {
+ throw new NoSuchElementException();
+ }
+ if (lastNext < 0) {
+ throw new IllegalStateException();
+ }
+ values[lastNext] = null;
+ keys[lastNext] = REMOVED;
+ numEntries--;
+ }
+
+ @Override
+ public FastByIDMap<V> clone() {
+ FastByIDMap<V> clone;
+ try {
+ clone = (FastByIDMap<V>) super.clone();
+ } catch (CloneNotSupportedException cnse) {
+ throw new AssertionError();
+ }
+ clone.keys = keys.clone();
+ clone.values = values.clone();
+ clone.recentlyAccessed = countingAccesses ? new BitSet(keys.length) : null;
+ return clone;
+ }
+
+ @Override
+ public String toString() {
+ if (isEmpty()) {
+ return "{}";
+ }
+ StringBuilder result = new StringBuilder();
+ result.append('{');
+ for (int i = 0; i < keys.length; i++) {
+ long key = keys[i];
+ if (key != NULL && key != REMOVED) {
+ result.append(key).append('=').append(values[i]).append(',');
+ }
+ }
+ result.setCharAt(result.length() - 1, '}');
+ return result.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ int hash = 0;
+ long[] keys = this.keys;
+ int max = keys.length;
+ for (int i = 0; i < max; i++) {
+ long key = keys[i];
+ if (key != NULL && key != REMOVED) {
+ hash = 31 * hash + ((int) (key >> 32) ^ (int) key);
+ hash = 31 * hash + values[i].hashCode();
+ }
+ }
+ return hash;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof FastByIDMap)) {
+ return false;
+ }
+ FastByIDMap<V> otherMap = (FastByIDMap<V>) other;
+ long[] otherKeys = otherMap.keys;
+ V[] otherValues = otherMap.values;
+ int length = keys.length;
+ int otherLength = otherKeys.length;
+ int max = Math.min(length, otherLength);
+
+ int i = 0;
+ while (i < max) {
+ long key = keys[i];
+ long otherKey = otherKeys[i];
+ if (key == NULL || key == REMOVED) {
+ if (otherKey != NULL && otherKey != REMOVED) {
+ return false;
+ }
+ } else {
+ if (key != otherKey || !values[i].equals(otherValues[i])) {
+ return false;
+ }
+ }
+ i++;
+ }
+ while (i < length) {
+ long key = keys[i];
+ if (key != NULL && key != REMOVED) {
+ return false;
+ }
+ i++;
+ }
+ while (i < otherLength) {
+ long key = otherKeys[i];
+ if (key != NULL && key != REMOVED) {
+ return false;
+ }
+ i++;
+ }
+ return true;
+ }
+
+ private final class KeyIterator extends AbstractLongPrimitiveIterator {
+
+ private int position;
+ private int lastNext = -1;
+
+ @Override
+ public boolean hasNext() {
+ goToNext();
+ return position < keys.length;
+ }
+
+ @Override
+ public long nextLong() {
+ goToNext();
+ lastNext = position;
+ if (position >= keys.length) {
+ throw new NoSuchElementException();
+ }
+ return keys[position++];
+ }
+
+ @Override
+ public long peek() {
+ goToNext();
+ if (position >= keys.length) {
+ throw new NoSuchElementException();
+ }
+ return keys[position];
+ }
+
+ private void goToNext() {
+ int length = values.length;
+ while (position < length && values[position] == null) {
+ position++;
+ }
+ }
+
+ @Override
+ public void remove() {
+ iteratorRemove(lastNext);
+ }
+
+ @Override
+ public void skip(int n) {
+ position += n;
+ }
+
+ }
+
+ private final class EntrySet extends AbstractSet<Map.Entry<Long,V>> {
+
+ @Override
+ public int size() {
+ return FastByIDMap.this.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return FastByIDMap.this.isEmpty();
+ }
+
+ @Override
+ public boolean contains(Object o) {
+ return containsKey((Long) o);
+ }
+
+ @Override
+ public Iterator<Map.Entry<Long,V>> iterator() {
+ return new EntryIterator();
+ }
+
+ @Override
+ public boolean add(Map.Entry<Long,V> t) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean remove(Object o) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean addAll(Collection<? extends Map.Entry<Long,V>> ts) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean retainAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean removeAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void clear() {
+ FastByIDMap.this.clear();
+ }
+
+ private final class MapEntry implements Map.Entry<Long,V> {
+
+ private final int index;
+
+ private MapEntry(int index) {
+ this.index = index;
+ }
+
+ @Override
+ public Long getKey() {
+ return keys[index];
+ }
+
+ @Override
+ public V getValue() {
+ return values[index];
+ }
+
+ @Override
+ public V setValue(V value) {
+ Preconditions.checkArgument(value != null);
+
+ V oldValue = values[index];
+ values[index] = value;
+ return oldValue;
+ }
+ }
+
+ private final class EntryIterator implements Iterator<Map.Entry<Long,V>> {
+
+ private int position;
+ private int lastNext = -1;
+
+ @Override
+ public boolean hasNext() {
+ goToNext();
+ return position < keys.length;
+ }
+
+ @Override
+ public Map.Entry<Long,V> next() {
+ goToNext();
+ lastNext = position;
+ if (position >= keys.length) {
+ throw new NoSuchElementException();
+ }
+ return new MapEntry(position++);
+ }
+
+ private void goToNext() {
+ int length = values.length;
+ while (position < length && values[position] == null) {
+ position++;
+ }
+ }
+
+ @Override
+ public void remove() {
+ iteratorRemove(lastNext);
+ }
+ }
+
+ }
+
+ private final class ValueCollection extends AbstractCollection<V> {
+
+ @Override
+ public int size() {
+ return FastByIDMap.this.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return FastByIDMap.this.isEmpty();
+ }
+
+ @Override
+ public boolean contains(Object o) {
+ return containsValue(o);
+ }
+
+ @Override
+ public Iterator<V> iterator() {
+ return new ValueIterator();
+ }
+
+ @Override
+ public boolean add(V v) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean remove(Object o) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean addAll(Collection<? extends V> vs) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean removeAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean retainAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void clear() {
+ FastByIDMap.this.clear();
+ }
+
+ private final class ValueIterator implements Iterator<V> {
+
+ private int position;
+ private int lastNext = -1;
+
+ @Override
+ public boolean hasNext() {
+ goToNext();
+ return position < values.length;
+ }
+
+ @Override
+ public V next() {
+ goToNext();
+ lastNext = position;
+ if (position >= values.length) {
+ throw new NoSuchElementException();
+ }
+ return values[position++];
+ }
+
+ private void goToNext() {
+ int length = values.length;
+ while (position < length && values[position] == null) {
+ position++;
+ }
+ }
+
+ @Override
+ public void remove() {
+ iteratorRemove(lastNext);
+ }
+
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
new file mode 100644
index 0000000..5908270
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
@@ -0,0 +1,426 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import org.apache.mahout.common.RandomUtils;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * @see FastByIDMap
+ */
+public final class FastIDSet implements Serializable, Cloneable, Iterable<Long> {
+
+ private static final float DEFAULT_LOAD_FACTOR = 1.5f;
+
+ /** Dummy object used to represent a key that has been removed. */
+ private static final long REMOVED = Long.MAX_VALUE;
+ private static final long NULL = Long.MIN_VALUE;
+
+ private long[] keys;
+ private float loadFactor;
+ private int numEntries;
+ private int numSlotsUsed;
+
+ /** Creates a new {@link FastIDSet} with default capacity. */
+ public FastIDSet() {
+ this(2);
+ }
+
+ public FastIDSet(long[] initialKeys) {
+ this(initialKeys.length);
+ addAll(initialKeys);
+ }
+
+ public FastIDSet(int size) {
+ this(size, DEFAULT_LOAD_FACTOR);
+ }
+
+ public FastIDSet(int size, float loadFactor) {
+ Preconditions.checkArgument(size >= 0, "size must be at least 0");
+ Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0");
+ this.loadFactor = loadFactor;
+ int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
+ Preconditions.checkArgument(size < max, "size must be less than %d", max);
+ int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
+ keys = new long[hashSize];
+ Arrays.fill(keys, NULL);
+ }
+
+ /**
+ * @see #findForAdd(long)
+ */
+ private int find(long key) {
+ int theHashCode = (int) key & 0x7FFFFFFF; // make sure it's positive
+ long[] keys = this.keys;
+ int hashSize = keys.length;
+ int jump = 1 + theHashCode % (hashSize - 2);
+ int index = theHashCode % hashSize;
+ long currentKey = keys[index];
+ while (currentKey != NULL && key != currentKey) { // note: true when currentKey == REMOVED
+ index -= index < jump ? jump - hashSize : jump;
+ currentKey = keys[index];
+ }
+ return index;
+ }
+
+ /**
+ * @see #find(long)
+ */
+ private int findForAdd(long key) {
+ int theHashCode = (int) key & 0x7FFFFFFF; // make sure it's positive
+ long[] keys = this.keys;
+ int hashSize = keys.length;
+ int jump = 1 + theHashCode % (hashSize - 2);
+ int index = theHashCode % hashSize;
+ long currentKey = keys[index];
+ while (currentKey != NULL && currentKey != REMOVED && key != currentKey) {
+ index -= index < jump ? jump - hashSize : jump;
+ currentKey = keys[index];
+ }
+ if (currentKey != REMOVED) {
+ return index;
+ }
+ // If we're adding, it's here, but, the key might have a value already later
+ int addIndex = index;
+ while (currentKey != NULL && key != currentKey) {
+ index -= index < jump ? jump - hashSize : jump;
+ currentKey = keys[index];
+ }
+ return key == currentKey ? index : addIndex;
+ }
+
+ public int size() {
+ return numEntries;
+ }
+
+ public boolean isEmpty() {
+ return numEntries == 0;
+ }
+
+ public boolean contains(long key) {
+ return key != NULL && key != REMOVED && keys[find(key)] != NULL;
+ }
+
+ public boolean add(long key) {
+ Preconditions.checkArgument(key != NULL && key != REMOVED);
+
+ // If less than half the slots are open, let's clear it up
+ if (numSlotsUsed * loadFactor >= keys.length) {
+ // If over half the slots used are actual entries, let's grow
+ if (numEntries * loadFactor >= numSlotsUsed) {
+ growAndRehash();
+ } else {
+ // Otherwise just rehash to clear REMOVED entries and don't grow
+ rehash();
+ }
+ }
+ // Here we may later consider implementing Brent's variation described on page 532
+ int index = findForAdd(key);
+ long keyIndex = keys[index];
+ if (keyIndex != key) {
+ keys[index] = key;
+ numEntries++;
+ if (keyIndex == NULL) {
+ numSlotsUsed++;
+ }
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public LongPrimitiveIterator iterator() {
+ return new KeyIterator();
+ }
+
+ public long[] toArray() {
+ long[] result = new long[numEntries];
+ for (int i = 0, position = 0; i < result.length; i++) {
+ while (keys[position] == NULL || keys[position] == REMOVED) {
+ position++;
+ }
+ result[i] = keys[position++];
+ }
+ return result;
+ }
+
+ public boolean remove(long key) {
+ if (key == NULL || key == REMOVED) {
+ return false;
+ }
+ int index = find(key);
+ if (keys[index] == NULL) {
+ return false;
+ } else {
+ keys[index] = REMOVED;
+ numEntries--;
+ return true;
+ }
+ }
+
+ public boolean addAll(long[] c) {
+ boolean changed = false;
+ for (long k : c) {
+ if (add(k)) {
+ changed = true;
+ }
+ }
+ return changed;
+ }
+
+ public boolean addAll(FastIDSet c) {
+ boolean changed = false;
+ for (long k : c.keys) {
+ if (k != NULL && k != REMOVED && add(k)) {
+ changed = true;
+ }
+ }
+ return changed;
+ }
+
+ public boolean removeAll(long[] c) {
+ boolean changed = false;
+ for (long o : c) {
+ if (remove(o)) {
+ changed = true;
+ }
+ }
+ return changed;
+ }
+
+ public boolean removeAll(FastIDSet c) {
+ boolean changed = false;
+ for (long k : c.keys) {
+ if (k != NULL && k != REMOVED && remove(k)) {
+ changed = true;
+ }
+ }
+ return changed;
+ }
+
+ public boolean retainAll(FastIDSet c) {
+ boolean changed = false;
+ for (int i = 0; i < keys.length; i++) {
+ long k = keys[i];
+ if (k != NULL && k != REMOVED && !c.contains(k)) {
+ keys[i] = REMOVED;
+ numEntries--;
+ changed = true;
+ }
+ }
+ return changed;
+ }
+
+ public void clear() {
+ numEntries = 0;
+ numSlotsUsed = 0;
+ Arrays.fill(keys, NULL);
+ }
+
+ private void growAndRehash() {
+ if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
+ throw new IllegalStateException("Can't grow any more");
+ }
+ rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length)));
+ }
+
+ public void rehash() {
+ rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries)));
+ }
+
+ private void rehash(int newHashSize) {
+ long[] oldKeys = keys;
+ numEntries = 0;
+ numSlotsUsed = 0;
+ keys = new long[newHashSize];
+ Arrays.fill(keys, NULL);
+ for (long key : oldKeys) {
+ if (key != NULL && key != REMOVED) {
+ add(key);
+ }
+ }
+ }
+
+ /**
+ * Convenience method to quickly compute just the size of the intersection with another {@link FastIDSet}.
+ *
+ * @param other
+ * {@link FastIDSet} to intersect with
+ * @return number of elements in intersection
+ */
+ public int intersectionSize(FastIDSet other) {
+ int count = 0;
+ for (long key : other.keys) {
+ if (key != NULL && key != REMOVED && keys[find(key)] != NULL) {
+ count++;
+ }
+ }
+ return count;
+ }
+
+ @Override
+ public FastIDSet clone() {
+ FastIDSet clone;
+ try {
+ clone = (FastIDSet) super.clone();
+ } catch (CloneNotSupportedException cnse) {
+ throw new AssertionError();
+ }
+ clone.keys = keys.clone();
+ return clone;
+ }
+
+ @Override
+ public int hashCode() {
+ int hash = 0;
+ long[] keys = this.keys;
+ for (long key : keys) {
+ if (key != NULL && key != REMOVED) {
+ hash = 31 * hash + ((int) (key >> 32) ^ (int) key);
+ }
+ }
+ return hash;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof FastIDSet)) {
+ return false;
+ }
+ FastIDSet otherMap = (FastIDSet) other;
+ long[] otherKeys = otherMap.keys;
+ int length = keys.length;
+ int otherLength = otherKeys.length;
+ int max = Math.min(length, otherLength);
+
+ int i = 0;
+ while (i < max) {
+ long key = keys[i];
+ long otherKey = otherKeys[i];
+ if (key == NULL || key == REMOVED) {
+ if (otherKey != NULL && otherKey != REMOVED) {
+ return false;
+ }
+ } else {
+ if (key != otherKey) {
+ return false;
+ }
+ }
+ i++;
+ }
+ while (i < length) {
+ long key = keys[i];
+ if (key != NULL && key != REMOVED) {
+ return false;
+ }
+ i++;
+ }
+ while (i < otherLength) {
+ long key = otherKeys[i];
+ if (key != NULL && key != REMOVED) {
+ return false;
+ }
+ i++;
+ }
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ if (isEmpty()) {
+ return "[]";
+ }
+ StringBuilder result = new StringBuilder();
+ result.append('[');
+ for (long key : keys) {
+ if (key != NULL && key != REMOVED) {
+ result.append(key).append(',');
+ }
+ }
+ result.setCharAt(result.length() - 1, ']');
+ return result.toString();
+ }
+
+ private final class KeyIterator extends AbstractLongPrimitiveIterator {
+
+ private int position;
+ private int lastNext = -1;
+
+ @Override
+ public boolean hasNext() {
+ goToNext();
+ return position < keys.length;
+ }
+
+ @Override
+ public long nextLong() {
+ goToNext();
+ lastNext = position;
+ if (position >= keys.length) {
+ throw new NoSuchElementException();
+ }
+ return keys[position++];
+ }
+
+ @Override
+ public long peek() {
+ goToNext();
+ if (position >= keys.length) {
+ throw new NoSuchElementException();
+ }
+ return keys[position];
+ }
+
+ private void goToNext() {
+ int length = keys.length;
+ while (position < length
+ && (keys[position] == NULL || keys[position] == REMOVED)) {
+ position++;
+ }
+ }
+
+ @Override
+ public void remove() {
+ if (lastNext >= keys.length) {
+ throw new NoSuchElementException();
+ }
+ if (lastNext < 0) {
+ throw new IllegalStateException();
+ }
+ keys[lastNext] = REMOVED;
+ numEntries--;
+ }
+
+ public Iterator<Long> iterator() {
+ return new KeyIterator();
+ }
+
+ @Override
+ public void skip(int n) {
+ position += n;
+ }
+
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
new file mode 100644
index 0000000..7c64b44
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
@@ -0,0 +1,729 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Serializable;
+import java.util.AbstractCollection;
+import java.util.AbstractSet;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+import org.apache.mahout.common.RandomUtils;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * <p>
+ * This is an optimized {@link Map} implementation, based on algorithms described in Knuth's "Art of Computer
+ * Programming", Vol. 3, p. 529.
+ * </p>
+ *
+ * <p>
+ * It should be faster than {@link java.util.HashMap} in some cases, but not all. Its main feature is a
+ * "max size" and the ability to transparently, efficiently and semi-intelligently evict old entries when max
+ * size is exceeded.
+ * </p>
+ *
+ * <p>
+ * This class is not a bit thread-safe.
+ * </p>
+ *
+ * <p>
+ * This implementation does not allow {@code null} as a key or value.
+ * </p>
+ */
+public final class FastMap<K,V> implements Map<K,V>, Serializable, Cloneable {
+
+ public static final int NO_MAX_SIZE = Integer.MAX_VALUE;
+ private static final float DEFAULT_LOAD_FACTOR = 1.5f;
+
+ /** Dummy object used to represent a key that has been removed. */
+ private static final Object REMOVED = new Object();
+
+ private K[] keys;
+ private V[] values;
+ private float loadFactor;
+ private int numEntries;
+ private int numSlotsUsed;
+ private final int maxSize;
+ private BitSet recentlyAccessed;
+ private final boolean countingAccesses;
+
+ /** Creates a new {@link FastMap} with default capacity. */
+ public FastMap() {
+ this(2, NO_MAX_SIZE);
+ }
+
+ public FastMap(int size) {
+ this(size, NO_MAX_SIZE);
+ }
+
+ public FastMap(Map<K,V> other) {
+ this(other.size());
+ putAll(other);
+ }
+
+ public FastMap(int size, float loadFactor) {
+ this(size, NO_MAX_SIZE, loadFactor);
+ }
+
+ public FastMap(int size, int maxSize) {
+ this(size, maxSize, DEFAULT_LOAD_FACTOR);
+ }
+
+ /**
+ * Creates a new whose capacity can accommodate the given number of entries without rehash.
+ *
+ * @param size desired capacity
+ * @param maxSize max capacity
+ * @throws IllegalArgumentException if size is less than 0, maxSize is less than 1
+ * or at least half of {@link RandomUtils#MAX_INT_SMALLER_TWIN_PRIME}, or
+ * loadFactor is less than 1
+ */
+ public FastMap(int size, int maxSize, float loadFactor) {
+ Preconditions.checkArgument(size >= 0, "size must be at least 0");
+ Preconditions.checkArgument(loadFactor >= 1.0f, "loadFactor must be at least 1.0");
+ this.loadFactor = loadFactor;
+ int max = (int) (RandomUtils.MAX_INT_SMALLER_TWIN_PRIME / loadFactor);
+ Preconditions.checkArgument(size < max, "size must be less than " + max);
+ Preconditions.checkArgument(maxSize >= 1, "maxSize must be at least 1");
+ int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
+ keys = (K[]) new Object[hashSize];
+ values = (V[]) new Object[hashSize];
+ this.maxSize = maxSize;
+ this.countingAccesses = maxSize != Integer.MAX_VALUE;
+ this.recentlyAccessed = countingAccesses ? new BitSet(hashSize) : null;
+ }
+
+ private int find(Object key) {
+ int theHashCode = key.hashCode() & 0x7FFFFFFF; // make sure it's positive
+ K[] keys = this.keys;
+ int hashSize = keys.length;
+ int jump = 1 + theHashCode % (hashSize - 2);
+ int index = theHashCode % hashSize;
+ K currentKey = keys[index];
+ while (currentKey != null && !key.equals(currentKey)) {
+ index -= index < jump ? jump - hashSize : jump;
+ currentKey = keys[index];
+ }
+ return index;
+ }
+
+ private int findForAdd(Object key) {
+ int theHashCode = key.hashCode() & 0x7FFFFFFF; // make sure it's positive
+ K[] keys = this.keys;
+ int hashSize = keys.length;
+ int jump = 1 + theHashCode % (hashSize - 2);
+ int index = theHashCode % hashSize;
+ K currentKey = keys[index];
+ while (currentKey != null && currentKey != REMOVED && key != currentKey) {
+ index -= index < jump ? jump - hashSize : jump;
+ currentKey = keys[index];
+ }
+ if (currentKey != REMOVED) {
+ return index;
+ }
+ // If we're adding, it's here, but, the key might have a value already later
+ int addIndex = index;
+ while (currentKey != null && key != currentKey) {
+ index -= index < jump ? jump - hashSize : jump;
+ currentKey = keys[index];
+ }
+ return key == currentKey ? index : addIndex;
+ }
+
+ @Override
+ public V get(Object key) {
+ if (key == null) {
+ return null;
+ }
+ int index = find(key);
+ if (countingAccesses) {
+ recentlyAccessed.set(index);
+ }
+ return values[index];
+ }
+
+ @Override
+ public int size() {
+ return numEntries;
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return numEntries == 0;
+ }
+
+ @Override
+ public boolean containsKey(Object key) {
+ return key != null && keys[find(key)] != null;
+ }
+
+ @Override
+ public boolean containsValue(Object value) {
+ if (value == null) {
+ return false;
+ }
+ for (V theValue : values) {
+ if (theValue != null && value.equals(theValue)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * @throws NullPointerException
+ * if key or value is null
+ */
+ @Override
+ public V put(K key, V value) {
+ Preconditions.checkNotNull(key);
+ Preconditions.checkNotNull(value);
+ // If less than half the slots are open, let's clear it up
+ if (numSlotsUsed * loadFactor >= keys.length) {
+ // If over half the slots used are actual entries, let's grow
+ if (numEntries * loadFactor >= numSlotsUsed) {
+ growAndRehash();
+ } else {
+ // Otherwise just rehash to clear REMOVED entries and don't grow
+ rehash();
+ }
+ }
+ // Here we may later consider implementing Brent's variation described on page 532
+ int index = findForAdd(key);
+ if (keys[index] == key) {
+ V oldValue = values[index];
+ values[index] = value;
+ return oldValue;
+ }
+ // If size is limited,
+ if (countingAccesses && numEntries >= maxSize) {
+ // and we're too large, clear some old-ish entry
+ clearStaleEntry(index);
+ }
+ keys[index] = key;
+ values[index] = value;
+ numEntries++;
+ numSlotsUsed++;
+ return null;
+ }
+
+ private void clearStaleEntry(int index) {
+ while (true) {
+ K currentKey;
+ do {
+ if (index == 0) {
+ index = keys.length - 1;
+ } else {
+ index--;
+ }
+ currentKey = keys[index];
+ } while (currentKey == null || currentKey == REMOVED);
+ if (recentlyAccessed.get(index)) {
+ recentlyAccessed.clear(index);
+ } else {
+ break;
+ }
+ }
+ // Delete the entry
+ ((Object[])keys)[index] = REMOVED;
+ numEntries--;
+ values[index] = null;
+ }
+
+ @Override
+ public void putAll(Map<? extends K,? extends V> map) {
+ for (Entry<? extends K,? extends V> entry : map.entrySet()) {
+ put(entry.getKey(), entry.getValue());
+ }
+ }
+
+ @Override
+ public V remove(Object key) {
+ if (key == null) {
+ return null;
+ }
+ int index = find(key);
+ if (keys[index] == null) {
+ return null;
+ } else {
+ ((Object[])keys)[index] = REMOVED;
+ numEntries--;
+ V oldValue = values[index];
+ values[index] = null;
+ // don't decrement numSlotsUsed
+ return oldValue;
+ }
+ // Could un-set recentlyAccessed's bit but doesn't matter
+ }
+
+ @Override
+ public void clear() {
+ numEntries = 0;
+ numSlotsUsed = 0;
+ Arrays.fill(keys, null);
+ Arrays.fill(values, null);
+ if (countingAccesses) {
+ recentlyAccessed.clear();
+ }
+ }
+
+ @Override
+ public Set<K> keySet() {
+ return new KeySet();
+ }
+
+ @Override
+ public Collection<V> values() {
+ return new ValueCollection();
+ }
+
+ @Override
+ public Set<Entry<K,V>> entrySet() {
+ return new EntrySet();
+ }
+
+ public void rehash() {
+ rehash(RandomUtils.nextTwinPrime((int) (loadFactor * numEntries)));
+ }
+
+ private void growAndRehash() {
+ if (keys.length * loadFactor >= RandomUtils.MAX_INT_SMALLER_TWIN_PRIME) {
+ throw new IllegalStateException("Can't grow any more");
+ }
+ rehash(RandomUtils.nextTwinPrime((int) (loadFactor * keys.length)));
+ }
+
+ private void rehash(int newHashSize) {
+ K[] oldKeys = keys;
+ V[] oldValues = values;
+ numEntries = 0;
+ numSlotsUsed = 0;
+ if (countingAccesses) {
+ recentlyAccessed = new BitSet(newHashSize);
+ }
+ keys = (K[]) new Object[newHashSize];
+ values = (V[]) new Object[newHashSize];
+ int length = oldKeys.length;
+ for (int i = 0; i < length; i++) {
+ K key = oldKeys[i];
+ if (key != null && key != REMOVED) {
+ put(key, oldValues[i]);
+ }
+ }
+ }
+
+ void iteratorRemove(int lastNext) {
+ if (lastNext >= values.length) {
+ throw new NoSuchElementException();
+ }
+ if (lastNext < 0) {
+ throw new IllegalStateException();
+ }
+ values[lastNext] = null;
+ ((Object[])keys)[lastNext] = REMOVED;
+ numEntries--;
+ }
+
+ @Override
+ public FastMap<K,V> clone() {
+ FastMap<K,V> clone;
+ try {
+ clone = (FastMap<K,V>) super.clone();
+ } catch (CloneNotSupportedException cnse) {
+ throw new AssertionError();
+ }
+ clone.keys = keys.clone();
+ clone.values = values.clone();
+ clone.recentlyAccessed = countingAccesses ? new BitSet(keys.length) : null;
+ return clone;
+ }
+
+ @Override
+ public int hashCode() {
+ int hash = 0;
+ K[] keys = this.keys;
+ int max = keys.length;
+ for (int i = 0; i < max; i++) {
+ K key = keys[i];
+ if (key != null && key != REMOVED) {
+ hash = 31 * hash + key.hashCode();
+ hash = 31 * hash + values[i].hashCode();
+ }
+ }
+ return hash;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (!(other instanceof FastMap)) {
+ return false;
+ }
+ FastMap<K,V> otherMap = (FastMap<K,V>) other;
+ K[] otherKeys = otherMap.keys;
+ V[] otherValues = otherMap.values;
+ int length = keys.length;
+ int otherLength = otherKeys.length;
+ int max = Math.min(length, otherLength);
+
+ int i = 0;
+ while (i < max) {
+ K key = keys[i];
+ K otherKey = otherKeys[i];
+ if (key == null || key == REMOVED) {
+ if (otherKey != null && otherKey != REMOVED) {
+ return false;
+ }
+ } else {
+ if (key != otherKey || !values[i].equals(otherValues[i])) {
+ return false;
+ }
+ }
+ i++;
+ }
+ while (i < length) {
+ K key = keys[i];
+ if (key != null && key != REMOVED) {
+ return false;
+ }
+ i++;
+ }
+ while (i < otherLength) {
+ K key = otherKeys[i];
+ if (key != null && key != REMOVED) {
+ return false;
+ }
+ i++;
+ }
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ if (isEmpty()) {
+ return "{}";
+ }
+ StringBuilder result = new StringBuilder();
+ result.append('{');
+ for (int i = 0; i < keys.length; i++) {
+ K key = keys[i];
+ if (key != null && key != REMOVED) {
+ result.append(key).append('=').append(values[i]).append(',');
+ }
+ }
+ result.setCharAt(result.length() - 1, '}');
+ return result.toString();
+ }
+
+ private final class EntrySet extends AbstractSet<Entry<K,V>> {
+
+ @Override
+ public int size() {
+ return FastMap.this.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return FastMap.this.isEmpty();
+ }
+
+ @Override
+ public boolean contains(Object o) {
+ return containsKey(o);
+ }
+
+ @Override
+ public Iterator<Entry<K,V>> iterator() {
+ return new EntryIterator();
+ }
+
+ @Override
+ public boolean add(Entry<K,V> t) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean remove(Object o) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean addAll(Collection<? extends Entry<K,V>> ts) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean retainAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean removeAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void clear() {
+ FastMap.this.clear();
+ }
+
+ private final class MapEntry implements Entry<K,V> {
+
+ private final int index;
+
+ private MapEntry(int index) {
+ this.index = index;
+ }
+
+ @Override
+ public K getKey() {
+ return keys[index];
+ }
+
+ @Override
+ public V getValue() {
+ return values[index];
+ }
+
+ @Override
+ public V setValue(V value) {
+ Preconditions.checkArgument(value != null);
+ V oldValue = values[index];
+ values[index] = value;
+ return oldValue;
+ }
+ }
+
+ private final class EntryIterator implements Iterator<Entry<K,V>> {
+
+ private int position;
+ private int lastNext = -1;
+
+ @Override
+ public boolean hasNext() {
+ goToNext();
+ return position < keys.length;
+ }
+
+ @Override
+ public Entry<K,V> next() {
+ goToNext();
+ lastNext = position;
+ if (position >= keys.length) {
+ throw new NoSuchElementException();
+ }
+ return new MapEntry(position++);
+ }
+
+ private void goToNext() {
+ int length = values.length;
+ while (position < length && values[position] == null) {
+ position++;
+ }
+ }
+
+ @Override
+ public void remove() {
+ iteratorRemove(lastNext);
+ }
+ }
+
+ }
+
+ private final class KeySet extends AbstractSet<K> {
+
+ @Override
+ public int size() {
+ return FastMap.this.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return FastMap.this.isEmpty();
+ }
+
+ @Override
+ public boolean contains(Object o) {
+ return containsKey(o);
+ }
+
+ @Override
+ public Iterator<K> iterator() {
+ return new KeyIterator();
+ }
+
+ @Override
+ public boolean add(K t) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean remove(Object o) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean addAll(Collection<? extends K> ts) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean retainAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean removeAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void clear() {
+ FastMap.this.clear();
+ }
+
+ private final class KeyIterator implements Iterator<K> {
+
+ private int position;
+ private int lastNext = -1;
+
+ @Override
+ public boolean hasNext() {
+ goToNext();
+ return position < keys.length;
+ }
+
+ @Override
+ public K next() {
+ goToNext();
+ lastNext = position;
+ if (position >= keys.length) {
+ throw new NoSuchElementException();
+ }
+ return keys[position++];
+ }
+
+ private void goToNext() {
+ int length = values.length;
+ while (position < length && values[position] == null) {
+ position++;
+ }
+ }
+
+ @Override
+ public void remove() {
+ iteratorRemove(lastNext);
+ }
+ }
+
+ }
+
+ private final class ValueCollection extends AbstractCollection<V> {
+
+ @Override
+ public int size() {
+ return FastMap.this.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return FastMap.this.isEmpty();
+ }
+
+ @Override
+ public boolean contains(Object o) {
+ return containsValue(o);
+ }
+
+ @Override
+ public Iterator<V> iterator() {
+ return new ValueIterator();
+ }
+
+ @Override
+ public boolean add(V v) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean remove(Object o) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean addAll(Collection<? extends V> vs) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean removeAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean retainAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void clear() {
+ FastMap.this.clear();
+ }
+
+ private final class ValueIterator implements Iterator<V> {
+
+ private int position;
+ private int lastNext = -1;
+
+ @Override
+ public boolean hasNext() {
+ goToNext();
+ return position < values.length;
+ }
+
+ @Override
+ public V next() {
+ goToNext();
+ lastNext = position;
+ if (position >= values.length) {
+ throw new NoSuchElementException();
+ }
+ return values[position++];
+ }
+
+ private void goToNext() {
+ int length = values.length;
+ while (position < length && values[position] == null) {
+ position++;
+ }
+ }
+
+ @Override
+ public void remove() {
+ iteratorRemove(lastNext);
+ }
+
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
new file mode 100644
index 0000000..1863d2b
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Serializable;
+
+/**
+ * <p>
+ * A simple class that represents a fixed value of an average and count. This is useful
+ * when an API needs to return {@link RunningAverage} but is not in a position to accept
+ * updates to it.
+ * </p>
+ */
+public class FixedRunningAverage implements RunningAverage, Serializable {
+
+ private final double average;
+ private final int count;
+
+ public FixedRunningAverage(double average, int count) {
+ this.average = average;
+ this.count = count;
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ @Override
+ public synchronized void addDatum(double datum) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ @Override
+ public synchronized void removeDatum(double datum) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ @Override
+ public synchronized void changeDatum(double delta) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public synchronized int getCount() {
+ return count;
+ }
+
+ @Override
+ public synchronized double getAverage() {
+ return average;
+ }
+
+ @Override
+ public RunningAverage inverse() {
+ return new InvertedRunningAverage(this);
+ }
+
+ @Override
+ public synchronized String toString() {
+ return String.valueOf(average);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
new file mode 100644
index 0000000..619b6b7
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * <p>
+ * A simple class that represents a fixed value of an average, count and standard deviation. This is useful
+ * when an API needs to return {@link RunningAverageAndStdDev} but is not in a position to accept
+ * updates to it.
+ * </p>
+ */
+public final class FixedRunningAverageAndStdDev extends FixedRunningAverage implements RunningAverageAndStdDev {
+
+ private final double stdDev;
+
+ public FixedRunningAverageAndStdDev(double average, double stdDev, int count) {
+ super(average, count);
+ this.stdDev = stdDev;
+ }
+
+ @Override
+ public RunningAverageAndStdDev inverse() {
+ return new InvertedRunningAverageAndStdDev(this);
+ }
+
+ @Override
+ public synchronized String toString() {
+ return super.toString() + ',' + stdDev;
+ }
+
+ @Override
+ public double getStandardDeviation() {
+ return stdDev;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
new file mode 100644
index 0000000..00d828f
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Serializable;
+
+/**
+ * <p>
+ * A simple class that can keep track of a running average of a series of numbers. One can add to or remove
+ * from the series, as well as update a datum in the series. The class does not actually keep track of the
+ * series of values, just its running average, so it doesn't even matter if you remove/change a value that
+ * wasn't added.
+ * </p>
+ */
+public class FullRunningAverage implements RunningAverage, Serializable {
+
+ private int count;
+ private double average;
+
+ public FullRunningAverage() {
+ this(0, Double.NaN);
+ }
+
+ public FullRunningAverage(int count, double average) {
+ this.count = count;
+ this.average = average;
+ }
+
+ /**
+ * @param datum
+ * new item to add to the running average
+ */
+ @Override
+ public synchronized void addDatum(double datum) {
+ if (++count == 1) {
+ average = datum;
+ } else {
+ average = average * (count - 1) / count + datum / count;
+ }
+ }
+
+ /**
+ * @param datum
+ * item to remove to the running average
+ * @throws IllegalStateException
+ * if count is 0
+ */
+ @Override
+ public synchronized void removeDatum(double datum) {
+ if (count == 0) {
+ throw new IllegalStateException();
+ }
+ if (--count == 0) {
+ average = Double.NaN;
+ } else {
+ average = average * (count + 1) / count - datum / count;
+ }
+ }
+
+ /**
+ * @param delta
+ * amount by which to change a datum in the running average
+ * @throws IllegalStateException
+ * if count is 0
+ */
+ @Override
+ public synchronized void changeDatum(double delta) {
+ if (count == 0) {
+ throw new IllegalStateException();
+ }
+ average += delta / count;
+ }
+
+ @Override
+ public synchronized int getCount() {
+ return count;
+ }
+
+ @Override
+ public synchronized double getAverage() {
+ return average;
+ }
+
+ @Override
+ public RunningAverage inverse() {
+ return new InvertedRunningAverage(this);
+ }
+
+ @Override
+ public synchronized String toString() {
+ return String.valueOf(average);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
new file mode 100644
index 0000000..6212e66
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * <p>
+ * Extends {@link FullRunningAverage} to add a running standard deviation computation.
+ * Uses Welford's method, as described at http://www.johndcook.com/standard_deviation.html
+ * </p>
+ */
+public final class FullRunningAverageAndStdDev extends FullRunningAverage implements RunningAverageAndStdDev {
+
+ private double stdDev;
+ private double mk;
+ private double sk;
+
+ public FullRunningAverageAndStdDev() {
+ mk = 0.0;
+ sk = 0.0;
+ recomputeStdDev();
+ }
+
+ public FullRunningAverageAndStdDev(int count, double average, double mk, double sk) {
+ super(count, average);
+ this.mk = mk;
+ this.sk = sk;
+ recomputeStdDev();
+ }
+
+ public double getMk() {
+ return mk;
+ }
+
+ public double getSk() {
+ return sk;
+ }
+
+ @Override
+ public synchronized double getStandardDeviation() {
+ return stdDev;
+ }
+
+ @Override
+ public synchronized void addDatum(double datum) {
+ super.addDatum(datum);
+ int count = getCount();
+ if (count == 1) {
+ mk = datum;
+ sk = 0.0;
+ } else {
+ double oldmk = mk;
+ double diff = datum - oldmk;
+ mk += diff / count;
+ sk += diff * (datum - mk);
+ }
+ recomputeStdDev();
+ }
+
+ @Override
+ public synchronized void removeDatum(double datum) {
+ int oldCount = getCount();
+ super.removeDatum(datum);
+ double oldmk = mk;
+ mk = (oldCount * oldmk - datum) / (oldCount - 1);
+ sk -= (datum - mk) * (datum - oldmk);
+ recomputeStdDev();
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ @Override
+ public void changeDatum(double delta) {
+ throw new UnsupportedOperationException();
+ }
+
+ private synchronized void recomputeStdDev() {
+ int count = getCount();
+ stdDev = count > 1 ? Math.sqrt(sk / (count - 1)) : Double.NaN;
+ }
+
+ @Override
+ public RunningAverageAndStdDev inverse() {
+ return new InvertedRunningAverageAndStdDev(this);
+ }
+
+ @Override
+ public synchronized String toString() {
+ return String.valueOf(String.valueOf(getAverage()) + ',' + stdDev);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
new file mode 100644
index 0000000..0f94c22
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+public final class InvertedRunningAverage implements RunningAverage {
+
+ private final RunningAverage delegate;
+
+ public InvertedRunningAverage(RunningAverage delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ public void addDatum(double datum) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void removeDatum(double datum) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void changeDatum(double delta) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int getCount() {
+ return delegate.getCount();
+ }
+
+ @Override
+ public double getAverage() {
+ return -delegate.getAverage();
+ }
+
+ @Override
+ public RunningAverage inverse() {
+ return delegate;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
new file mode 100644
index 0000000..147012d
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+public final class InvertedRunningAverageAndStdDev implements RunningAverageAndStdDev {
+
+ private final RunningAverageAndStdDev delegate;
+
+ public InvertedRunningAverageAndStdDev(RunningAverageAndStdDev delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ public void addDatum(double datum) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void removeDatum(double datum) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void changeDatum(double delta) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int getCount() {
+ return delegate.getCount();
+ }
+
+ @Override
+ public double getAverage() {
+ return -delegate.getAverage();
+ }
+
+ @Override
+ public double getStandardDeviation() {
+ return delegate.getStandardDeviation();
+ }
+
+ @Override
+ public RunningAverageAndStdDev inverse() {
+ return delegate;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIterator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIterator.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIterator.java
new file mode 100644
index 0000000..5127df0
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIterator.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.NoSuchElementException;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * While long[] is an Iterable, it is not an Iterable<Long>. This adapter class addresses that.
+ */
+public final class LongPrimitiveArrayIterator implements LongPrimitiveIterator {
+
+ private final long[] array;
+ private int position;
+ private final int max;
+
+ /**
+ * <p>
+ * Creates an {@link LongPrimitiveArrayIterator} over an entire array.
+ * </p>
+ *
+ * @param array
+ * array to iterate over
+ */
+ public LongPrimitiveArrayIterator(long[] array) {
+ this.array = Preconditions.checkNotNull(array); // yeah, not going to copy the array here, for performance
+ this.position = 0;
+ this.max = array.length;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return position < max;
+ }
+
+ @Override
+ public Long next() {
+ return nextLong();
+ }
+
+ @Override
+ public long nextLong() {
+ if (position >= array.length) {
+ throw new NoSuchElementException();
+ }
+ return array[position++];
+ }
+
+ @Override
+ public long peek() {
+ if (position >= array.length) {
+ throw new NoSuchElementException();
+ }
+ return array[position];
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void skip(int n) {
+ if (n > 0) {
+ position += n;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "LongPrimitiveArrayIterator";
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveIterator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveIterator.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveIterator.java
new file mode 100644
index 0000000..0840749
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveIterator.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * Adds notion of iterating over {@code long} primitives in the style of an {@link java.util.Iterator} -- as
+ * opposed to iterating over {@link Long}. Implementations of this interface however also implement
+ * {@link java.util.Iterator} and {@link Iterable} over {@link Long} for convenience.
+ */
+public interface LongPrimitiveIterator extends SkippingIterator<Long> {
+
+ /**
+ * @return next {@code long} in iteration
+ * @throws java.util.NoSuchElementException
+ * if no more elements exist in the iteration
+ */
+ long nextLong();
+
+ /**
+ * @return next {@code long} in iteration without advancing iteration
+ */
+ long peek();
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RefreshHelper.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RefreshHelper.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RefreshHelper.java
new file mode 100644
index 0000000..cc91560
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RefreshHelper.java
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.locks.ReentrantLock;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A helper class for implementing {@link Refreshable}. This object is typically included in an implementation
+ * {@link Refreshable} to implement {@link Refreshable#refresh(Collection)}. It execute the class's own
+ * supplied update logic, after updating all the object's dependencies. This also ensures that dependencies
+ * are not updated multiple times.
+ */
+public final class RefreshHelper implements Refreshable {
+
+ private static final Logger log = LoggerFactory.getLogger(RefreshHelper.class);
+
+ private final List<Refreshable> dependencies;
+ private final ReentrantLock refreshLock;
+ private final Callable<?> refreshRunnable;
+
+ /**
+ * @param refreshRunnable
+ * encapsulates the containing object's own refresh logic
+ */
+ public RefreshHelper(Callable<?> refreshRunnable) {
+ this.dependencies = Lists.newArrayListWithCapacity(3);
+ this.refreshLock = new ReentrantLock();
+ this.refreshRunnable = refreshRunnable;
+ }
+
+ /** Add a dependency to be refreshed first when the encapsulating object does. */
+ public void addDependency(Refreshable refreshable) {
+ if (refreshable != null) {
+ dependencies.add(refreshable);
+ }
+ }
+
+ public void removeDependency(Refreshable refreshable) {
+ if (refreshable != null) {
+ dependencies.remove(refreshable);
+ }
+ }
+
+ /**
+ * Typically this is called in {@link Refreshable#refresh(java.util.Collection)} and is the entire body of
+ * that method.
+ */
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ if (refreshLock.tryLock()) {
+ try {
+ alreadyRefreshed = buildRefreshed(alreadyRefreshed);
+ for (Refreshable dependency : dependencies) {
+ maybeRefresh(alreadyRefreshed, dependency);
+ }
+ if (refreshRunnable != null) {
+ try {
+ refreshRunnable.call();
+ } catch (Exception e) {
+ log.warn("Unexpected exception while refreshing", e);
+ }
+ }
+ } finally {
+ refreshLock.unlock();
+ }
+ }
+ }
+
+ /**
+ * Creates a new and empty {@link Collection} if the method parameter is {@code null}.
+ *
+ * @param currentAlreadyRefreshed
+ * {@link Refreshable}s to refresh later on
+ * @return an empty {@link Collection} if the method param was {@code null} or the unmodified method
+ * param.
+ */
+ public static Collection<Refreshable> buildRefreshed(Collection<Refreshable> currentAlreadyRefreshed) {
+ return currentAlreadyRefreshed == null ? Sets.<Refreshable>newHashSetWithExpectedSize(3) : currentAlreadyRefreshed;
+ }
+
+ /**
+ * Adds the specified {@link Refreshable} to the given collection of {@link Refreshable}s if it is not
+ * already there and immediately refreshes it.
+ *
+ * @param alreadyRefreshed
+ * the collection of {@link Refreshable}s
+ * @param refreshable
+ * the {@link Refreshable} to potentially add and refresh
+ */
+ public static void maybeRefresh(Collection<Refreshable> alreadyRefreshed, Refreshable refreshable) {
+ if (!alreadyRefreshed.contains(refreshable)) {
+ alreadyRefreshed.add(refreshable);
+ log.info("Added refreshable: {}", refreshable);
+ refreshable.refresh(alreadyRefreshed);
+ log.info("Refreshed: {}", alreadyRefreshed);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/Retriever.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/Retriever.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/Retriever.java
new file mode 100644
index 0000000..40da9de
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/Retriever.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+
+/**
+ * <p>
+ * Implementations can retrieve a value for a given key.
+ * </p>
+ */
+public interface Retriever<K,V> {
+
+ /**
+ * @param key key for which a value should be retrieved
+ * @return value for key
+ * @throws TasteException if an error occurs while retrieving the value
+ */
+ V get(K key) throws TasteException;
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
new file mode 100644
index 0000000..bf8e39c
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * <p>
+ * Interface for classes that can keep track of a running average of a series of numbers. One can add to or
+ * remove from the series, as well as update a datum in the series. The class does not actually keep track of
+ * the series of values, just its running average, so it doesn't even matter if you remove/change a value that
+ * wasn't added.
+ * </p>
+ */
+public interface RunningAverage {
+
+ /**
+ * @param datum
+ * new item to add to the running average
+ * @throws IllegalArgumentException
+ * if datum is {@link Double#NaN}
+ */
+ void addDatum(double datum);
+
+ /**
+ * @param datum
+ * item to remove to the running average
+ * @throws IllegalArgumentException
+ * if datum is {@link Double#NaN}
+ * @throws IllegalStateException
+ * if count is 0
+ */
+ void removeDatum(double datum);
+
+ /**
+ * @param delta
+ * amount by which to change a datum in the running average
+ * @throws IllegalArgumentException
+ * if delta is {@link Double#NaN}
+ * @throws IllegalStateException
+ * if count is 0
+ */
+ void changeDatum(double delta);
+
+ int getCount();
+
+ double getAverage();
+
+ /**
+ * @return a (possibly immutable) object whose average is the negative of this object's
+ */
+ RunningAverage inverse();
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
new file mode 100644
index 0000000..4ac6108
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * <p>
+ * Extends {@link RunningAverage} by adding standard deviation too.
+ * </p>
+ */
+public interface RunningAverageAndStdDev extends RunningAverage {
+
+ /** @return standard deviation of data */
+ double getStandardDeviation();
+
+ /**
+ * @return a (possibly immutable) object whose average is the negative of this object's
+ */
+ @Override
+ RunningAverageAndStdDev inverse();
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java
new file mode 100644
index 0000000..6da709d
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingLongPrimitiveIterator.java
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.NoSuchElementException;
+
+import com.google.common.base.Preconditions;
+import org.apache.commons.math3.distribution.PascalDistribution;
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.common.RandomWrapper;
+
+/**
+ * Wraps a {@link LongPrimitiveIterator} and returns only some subset of the elements that it would,
+ * as determined by a sampling rate parameter.
+ */
+public final class SamplingLongPrimitiveIterator extends AbstractLongPrimitiveIterator {
+
+ private final PascalDistribution geometricDistribution;
+ private final LongPrimitiveIterator delegate;
+ private long next;
+ private boolean hasNext;
+
+ public SamplingLongPrimitiveIterator(LongPrimitiveIterator delegate, double samplingRate) {
+ this(RandomUtils.getRandom(), delegate, samplingRate);
+ }
+
+ public SamplingLongPrimitiveIterator(RandomWrapper random, LongPrimitiveIterator delegate, double samplingRate) {
+ Preconditions.checkNotNull(delegate);
+ Preconditions.checkArgument(samplingRate > 0.0 && samplingRate <= 1.0, "Must be: 0.0 < samplingRate <= 1.0");
+ // Geometric distribution is special case of negative binomial (aka Pascal) with r=1:
+ geometricDistribution = new PascalDistribution(random.getRandomGenerator(), 1, samplingRate);
+ this.delegate = delegate;
+ this.hasNext = true;
+ doNext();
+ }
+
+ @Override
+ public boolean hasNext() {
+ return hasNext;
+ }
+
+ @Override
+ public long nextLong() {
+ if (hasNext) {
+ long result = next;
+ doNext();
+ return result;
+ }
+ throw new NoSuchElementException();
+ }
+
+ @Override
+ public long peek() {
+ if (hasNext) {
+ return next;
+ }
+ throw new NoSuchElementException();
+ }
+
+ private void doNext() {
+ int toSkip = geometricDistribution.sample();
+ delegate.skip(toSkip);
+ if (delegate.hasNext()) {
+ next = delegate.next();
+ } else {
+ hasNext = false;
+ }
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void skip(int n) {
+ int toSkip = 0;
+ for (int i = 0; i < n; i++) {
+ toSkip += geometricDistribution.sample();
+ }
+ delegate.skip(toSkip);
+ if (delegate.hasNext()) {
+ next = delegate.next();
+ } else {
+ hasNext = false;
+ }
+ }
+
+ public static LongPrimitiveIterator maybeWrapIterator(LongPrimitiveIterator delegate, double samplingRate) {
+ return samplingRate >= 1.0 ? delegate : new SamplingLongPrimitiveIterator(delegate, samplingRate);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java
----------------------------------------------------------------------
diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java
new file mode 100644
index 0000000..e88f98a
--- /dev/null
+++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+
+/**
+ * Adds ability to skip ahead in an iterator, perhaps more efficiently than by calling {@link #next()}
+ * repeatedly.
+ */
+public interface SkippingIterator<V> extends Iterator<V> {
+
+ /**
+ * Skip the next n elements supplied by this {@link Iterator}. If there are less than n elements remaining,
+ * this skips all remaining elements in the {@link Iterator}. This method has the same effect as calling
+ * {@link #next()} n times, except that it will never throw {@link java.util.NoSuchElementException}.
+ */
+ void skip(int n);
+
+}