You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by td...@apache.org on 2012/05/16 13:29:10 UTC

svn commit: r1339121 [3/4] - in /mahout/trunk/math: ./ src/main/java-templates/org/apache/mahout/math/function/ src/main/java-templates/org/apache/mahout/math/list/ src/main/java-templates/org/apache/mahout/math/map/ src/main/java-templates/org/apache/...

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/function/Functions.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/function/Functions.java?rev=1339121&r1=1339120&r2=1339121&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/function/Functions.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/function/Functions.java Wed May 16 11:29:08 2012
@@ -39,7 +39,7 @@ import java.util.Date;
  * {@link org.apache.mahout.math.function.DoubleFunction}, binary functions of type {@link
  * org.apache.mahout.math.function.DoubleDoubleFunction}. All can be retrieved via <tt>public static final</tt>
  * variables named after the function. Unary predicates are of type
- * {@link org.apache.mahout.math.function.DoubleProcedure},
+ * {@link DoubleProcedure},
  * binary predicates of type {@link org.apache.mahout.math.function.DoubleDoubleProcedure}. All can be retrieved via
  * <tt>public static final</tt> variables named <tt>isXXX</tt>.
  *
@@ -647,6 +647,8 @@ public final class Functions {
   /**
    * Constructs a function that returns <tt>from<=a && a<=to</tt>. <tt>a</tt> is a variable, <tt>from</tt> and
    * <tt>to</tt> are fixed.
+   *
+   * Note that DoubleProcedure is generated code and thus looks like an invalid reference unless you can see the generated stuff.
    */
   public static DoubleProcedure isBetween(final double from, final double to) {
     return new DoubleProcedure() {

Copied: mahout/trunk/math/src/main/java/org/apache/mahout/math/function/ObjectObjectProcedure.java (from r1339109, mahout/trunk/math/src/main/java/org/apache/mahout/math/function/DoubleFunction.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/function/ObjectObjectProcedure.java?p2=mahout/trunk/math/src/main/java/org/apache/mahout/math/function/ObjectObjectProcedure.java&p1=mahout/trunk/math/src/main/java/org/apache/mahout/math/function/DoubleFunction.java&r1=1339109&r2=1339121&rev=1339121&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/function/DoubleFunction.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/function/ObjectObjectProcedure.java Wed May 16 11:29:08 2012
@@ -16,29 +16,25 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
 package org.apache.mahout.math.function;
 
-/*
-Copyright 1999 CERN - European Organization for Nuclear Research.
-Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
-is hereby granted without fee, provided that the above copyright notice appear in all copies and 
-that both that copyright notice and this permission notice appear in supporting documentation. 
-CERN makes no representations about the suitability of this software for any purpose. 
-It is provided "as is" without expressed or implied warranty.
-*/
-
 /**
- * Interface that represents a function object: a function that takes a single argument and returns a single value.
+ * Interface that represents a procedure object: 
+ * a procedure that takes two arguments and returns a 'continue' flag.
  */
-public interface DoubleFunction {
+public interface ObjectObjectProcedure<K,V> {
 
   /**
-   * Apply the function to the argument and return the result
+   * Applies a procedure to an argument. Optionally can return a boolean flag to inform the object calling the
+   * procedure.
    *
-   * @param arg1 double for the argument
-   * @return the result of applying the function
+   * <p>Example: forEach() methods often use procedure objects. To signal to a forEach() method whether iteration should
+   * continue normally or terminate (because for example a matching element has been found), a procedure can return
+   * <tt>false</tt> to indicate termination and <tt>true</tt> to indicate continuation.
+   *
+   * @param key key value passed to the procedure
+   * @param value value value passed to the procedure.
+   * @return a flag  to inform the object calling the procedure.
    */
-  double apply(double arg1);
-
+  boolean apply(K key, V value);
 }

Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/function/ObjectProcedure.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/function/ObjectProcedure.java?rev=1339121&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/function/ObjectProcedure.java (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/function/ObjectProcedure.java Wed May 16 11:29:08 2012
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+
+
+package org.apache.mahout.math.function;
+
+/**
+ * Interface that represents a procedure object: a procedure that takes a single argument and does not return a value.
+ */
+public interface ObjectProcedure<T> {
+
+  /**
+   * Applies a procedure to an argument. Optionally can return a boolean flag to inform the object calling the
+   * procedure.
+   *
+   * <p>Example: forEach() methods often use procedure objects. To signal to a forEach() method whether iteration should
+   * continue normally or terminate (because for example a matching element has been found), a procedure can return
+   * <tt>false</tt> to indicate termination and <tt>true</tt> to indicate continuation.
+   *
+   * @param element element passed to the procedure.
+   * @return a flag  to inform the object calling the procedure.
+   */
+  boolean apply(T element);
+}

Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/function/package.html
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/function/package.html?rev=1339121&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/function/package.html (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/function/package.html Wed May 16 11:29:08 2012
@@ -0,0 +1,5 @@
+<HTML>
+<BODY>
+Core interfaces for functions, comparisons and procedures on objects and primitive data types.
+</BODY>
+</HTML>

Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/list/AbstractList.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/list/AbstractList.java?rev=1339121&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/list/AbstractList.java (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/list/AbstractList.java Wed May 16 11:29:08 2012
@@ -0,0 +1,245 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ /*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.list;
+
+import org.apache.mahout.math.PersistentObject;
+
+/**
+ Abstract base class for resizable lists holding objects or primitive data types such as <code>int</code>, <code>float</code>, etc.
+ First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+ <p>
+ <b>Note that this implementation is not synchronized.</b>
+
+ @author wolfgang.hoschek@cern.ch
+ @version 1.0, 09/24/99
+ @see     java.util.ArrayList
+ @see      java.util.Vector
+ @see      java.util.Arrays
+ */
+public abstract class AbstractList extends PersistentObject {
+  
+  public abstract int size();
+  
+  public boolean isEmpty() {
+    return size() == 0;
+  }
+
+  /**
+   * Inserts <tt>length</tt> dummy elements before the specified position into the receiver. Shifts the element
+   * currently at that position (if any) and any subsequent elements to the right. <b>This method must set the new size
+   * to be <tt>size()+length</tt>.
+   *
+   * @param index  index before which to insert dummy elements (must be in [0,size])..
+   * @param length number of dummy elements to be inserted.
+   * @throws IndexOutOfBoundsException if <tt>index &lt; 0 || index &gt; size()</tt>.
+   */
+  protected abstract void beforeInsertDummies(int index, int length);
+
+  /** Checks if the given index is in range. */
+  protected static void checkRange(int index, int theSize) {
+    if (index >= theSize || index < 0) {
+      throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + theSize);
+    }
+  }
+
+  /**
+   * Checks if the given range is within the contained array's bounds.
+   *
+   * @throws IndexOutOfBoundsException if <tt>to!=from-1 || from&lt;0 || from&gt;to || to&gt;=size()</tt>.
+   */
+  protected static void checkRangeFromTo(int from, int to, int theSize) {
+    if (to == from - 1) {
+      return;
+    }
+    if (from < 0 || from > to || to >= theSize) {
+      throw new IndexOutOfBoundsException("from: " + from + ", to: " + to + ", size=" + theSize);
+    }
+  }
+
+  /**
+   * Removes all elements from the receiver.  The receiver will be empty after this call returns, but keep its current
+   * capacity.
+   */
+  public void clear() {
+    removeFromTo(0, size() - 1);
+  }
+
+  /**
+   * Sorts the receiver into ascending order. This sort is guaranteed to be <i>stable</i>:  equal elements will not be
+   * reordered as a result of the sort.<p>
+   *
+   * The sorting algorithm is a modified mergesort (in which the merge is omitted if the highest element in the low
+   * sublist is less than the lowest element in the high sublist).  This algorithm offers guaranteed n*log(n)
+   * performance, and can approach linear performance on nearly sorted lists.
+   *
+   * <p><b>You should never call this method unless you are sure that this particular sorting algorithm is the right one
+   * for your data set.</b> It is generally better to call <tt>sort()</tt> or <tt>sortFromTo(...)</tt> instead, because
+   * those methods automatically choose the best sorting algorithm.
+   */
+  public final void mergeSort() {
+    mergeSortFromTo(0, size() - 1);
+  }
+
+  /**
+   * Sorts the receiver into ascending order. This sort is guaranteed to be <i>stable</i>:  equal elements will not be
+   * reordered as a result of the sort.<p>
+   *
+   * The sorting algorithm is a modified mergesort (in which the merge is omitted if the highest element in the low
+   * sublist is less than the lowest element in the high sublist).  This algorithm offers guaranteed n*log(n)
+   * performance, and can approach linear performance on nearly sorted lists.
+   *
+   * <p><b>You should never call this method unless you are sure that this particular sorting algorithm is the right one
+   * for your data set.</b> It is generally better to call <tt>sort()</tt> or <tt>sortFromTo(...)</tt> instead, because
+   * those methods automatically choose the best sorting algorithm.
+   *
+   * @param from the index of the first element (inclusive) to be sorted.
+   * @param to   the index of the last element (inclusive) to be sorted.
+   * @throws IndexOutOfBoundsException if <tt>(from&lt;0 || from&gt;to || to&gt;=size()) && to!=from-1</tt>.
+   */
+  public abstract void mergeSortFromTo(int from, int to);
+
+  /**
+   * Sorts the receiver into ascending order.  The sorting algorithm is a tuned quicksort, adapted from Jon L. Bentley
+   * and M. Douglas McIlroy's "Engineering a Sort Function", Software-Practice and Experience, Vol. 23(11) P. 1249-1265
+   * (November 1993).  This algorithm offers n*log(n) performance on many data sets that cause other quicksorts to
+   * degrade to quadratic performance.
+   *
+   * <p><b>You should never call this method unless you are sure that this particular sorting algorithm is the right one
+   * for your data set.</b> It is generally better to call <tt>sort()</tt> or <tt>sortFromTo(...)</tt> instead, because
+   * those methods automatically choose the best sorting algorithm.
+   */
+  public final void quickSort() {
+    quickSortFromTo(0, size() - 1);
+  }
+
+  /**
+   * Sorts the specified range of the receiver into ascending order.  The sorting algorithm is a tuned quicksort,
+   * adapted from Jon L. Bentley and M. Douglas McIlroy's "Engineering a Sort Function", Software-Practice and
+   * Experience, Vol. 23(11) P. 1249-1265 (November 1993).  This algorithm offers n*log(n) performance on many data sets
+   * that cause other quicksorts to degrade to quadratic performance.
+   *
+   * <p><b>You should never call this method unless you are sure that this particular sorting algorithm is the right one
+   * for your data set.</b> It is generally better to call <tt>sort()</tt> or <tt>sortFromTo(...)</tt> instead, because
+   * those methods automatically choose the best sorting algorithm.
+   *
+   * @param from the index of the first element (inclusive) to be sorted.
+   * @param to   the index of the last element (inclusive) to be sorted.
+   * @throws IndexOutOfBoundsException if <tt>(from&lt;0 || from&gt;to || to&gt;=size()) && to!=from-1</tt>.
+   */
+  public abstract void quickSortFromTo(int from, int to);
+
+  /**
+   * Removes the element at the specified position from the receiver. Shifts any subsequent elements to the left.
+   *
+   * @param index the index of the element to removed.
+   * @throws IndexOutOfBoundsException if <tt>index &lt; 0 || index &gt;= size()</tt>.
+   */
+  public void remove(int index) {
+    removeFromTo(index, index);
+  }
+
+  /**
+   * Removes from the receiver all elements whose index is between <code>from</code>, inclusive and <code>to</code>,
+   * inclusive.  Shifts any succeeding elements to the left (reduces their index). This call shortens the list by
+   * <tt>(to - from + 1)</tt> elements.
+   *
+   * @param fromIndex index of first element to be removed.
+   * @param toIndex   index of last element to be removed.
+   * @throws IndexOutOfBoundsException if <tt>(from&lt;0 || from&gt;to || to&gt;=size()) && to!=from-1</tt>.
+   */
+  public abstract void removeFromTo(int fromIndex, int toIndex);
+
+  /** Reverses the elements of the receiver. Last becomes first, second last becomes second first, and so on. */
+  public abstract void reverse();
+
+  /**
+   * Sets the size of the receiver. If the new size is greater than the current size, new null or zero items are added
+   * to the end of the receiver. If the new size is less than the current size, all components at index newSize and
+   * greater are discarded. This method does not release any superfluos internal memory. Use method <tt>trimToSize</tt>
+   * to release superfluos internal memory.
+   *
+   * @param newSize the new size of the receiver.
+   * @throws IndexOutOfBoundsException if <tt>newSize &lt; 0</tt>.
+   */
+  public void setSize(int newSize) {
+    if (newSize < 0) {
+      throw new IndexOutOfBoundsException("newSize:" + newSize);
+    }
+
+    int currentSize = size();
+    if (newSize != currentSize) {
+      if (newSize > currentSize) {
+        beforeInsertDummies(currentSize, newSize - currentSize);
+      } else if (newSize < currentSize) {
+        removeFromTo(newSize, currentSize - 1);
+      }
+    }
+  }
+
+  /**
+   * Sorts the receiver into ascending order.
+   *
+   * The sorting algorithm is dynamically chosen according to the characteristics of the data set.
+   *
+   * This implementation simply calls <tt>sortFromTo(...)</tt>. Override <tt>sortFromTo(...)</tt> if you can determine
+   * which sort is most appropriate for the given data set.
+   */
+  public final void sort() {
+    sortFromTo(0, size() - 1);
+  }
+
+  /**
+   * Sorts the specified range of the receiver into ascending order.
+   *
+   * The sorting algorithm is dynamically chosen according to the characteristics of the data set. This default
+   * implementation simply calls quickSort. Override this method if you can determine which sort is most appropriate for
+   * the given data set.
+   *
+   * @param from the index of the first element (inclusive) to be sorted.
+   * @param to   the index of the last element (inclusive) to be sorted.
+   * @throws IndexOutOfBoundsException if <tt>(from&lt;0 || from&gt;to || to&gt;=size()) && to!=from-1</tt>.
+   */
+  public void sortFromTo(int from, int to) {
+    quickSortFromTo(from, to);
+  }
+
+  /**
+   * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluos internal memory. An
+   * application can use this operation to minimize the storage of the receiver. <p> This default implementation does
+   * nothing. Override this method in space efficient implementations.
+   */
+  public void trimToSize() {
+  }
+}

Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/list/AbstractObjectList.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/list/AbstractObjectList.java?rev=1339121&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/list/AbstractObjectList.java (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/list/AbstractObjectList.java Wed May 16 11:29:08 2012
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.list;
+
+import java.util.Collection;
+
+/**
+ Abstract base class for resizable lists holding objects or primitive data types such as <code>int</code>, <code>float</code>, etc.
+ First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+ <p>
+ <b>Note that this implementation is not synchronized.</b>
+
+ @author wolfgang.hoschek@cern.ch
+ @version 1.0, 09/24/99
+ @see      java.util.ArrayList
+ @see      java.util.Vector
+ @see      java.util.Arrays
+ */
+public abstract class AbstractObjectList<T> extends AbstractList {
+
+  /**
+   * Appends all of the elements of the specified Collection to the receiver.
+   *
+   * @throws ClassCastException if an element in the collection is not of the same parameter type of the receiver.
+   */
+  public void addAllOf(Collection<T> collection) {
+    this.beforeInsertAllOf(size(), collection);
+  }
+
+  /**
+   * Inserts all elements of the specified collection before the specified position into the receiver. Shifts the
+   * element currently at that position (if any) and any subsequent elements to the right (increases their indices).
+   *
+   * @param index      index before which to insert first element from the specified collection.
+   * @param collection the collection to be inserted
+   * @throws ClassCastException        if an element in the collection is not of the same parameter type of the
+   *                                   receiver.
+   * @throws IndexOutOfBoundsException if <tt>index &lt; 0 || index &gt; size()</tt>.
+   */
+  public void beforeInsertAllOf(int index, Collection<T> collection) {
+    this.beforeInsertDummies(index, collection.size());
+    this.replaceFromWith(index, collection);
+  }
+
+  /**
+   * Replaces the part of the receiver starting at <code>from</code> (inclusive) with all the elements of the specified
+   * collection. Does not alter the size of the receiver. Replaces exactly <tt>Math.max(0,Math.min(size()-from,
+   * other.size()))</tt> elements.
+   *
+   * @param from  the index at which to copy the first element from the specified collection.
+   * @param other Collection to replace part of the receiver
+   * @throws IndexOutOfBoundsException if <tt>index &lt; 0 || index &gt;= size()</tt>.
+   */
+  public abstract void replaceFromWith(int from, Collection<T> other);
+}

Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java?rev=1339121&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java Wed May 16 11:29:08 2012
@@ -0,0 +1,417 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.list;
+
+import org.apache.mahout.math.function.ObjectProcedure;
+
+import java.util.Collection;
+
+/**
+ Resizable list holding <code>${valueType}</code> elements; implemented with arrays.
+*/
+
+public class ObjectArrayList<T> extends AbstractObjectList<T> {
+
+  /**
+   * The array buffer into which the elements of the list are stored. The capacity of the list is the length of this
+   * array buffer.
+   */
+  private Object[] elements;
+  private int size;
+
+  /** Constructs an empty list. */
+  public ObjectArrayList() {
+    this(10);
+  }
+
+  /**
+   * Constructs a list containing the specified elements. The initial size and capacity of the list is the length of the
+   * array.
+   *
+   * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>. So if
+   * subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+   *
+   * @param elements the array to be backed by the the constructed list
+   */
+  public ObjectArrayList(T[] elements) {
+    elements(elements);
+  }
+
+  /**
+   * Constructs an empty list with the specified initial capacity.
+   *
+   * @param initialCapacity the number of elements the receiver can hold without auto-expanding itself by allocating new
+   *                        internal memory.
+   */
+  @SuppressWarnings("unchecked")
+  public ObjectArrayList(int initialCapacity) {
+    elements((T[])new Object[initialCapacity]);
+  }
+
+  /**
+   * Appends the specified element to the end of this list.
+   *
+   * @param element element to be appended to this list.
+   */
+  public void add(T element) {
+    // overridden for performance only.
+    if (size == elements.length) {
+      ensureCapacity(size + 1);
+    }
+    elements[size++] = element;
+  }
+
+  /**
+   * Inserts the specified element before the specified position into the receiver. Shifts the element currently at that
+   * position (if any) and any subsequent elements to the right.
+   *
+   * @param index   index before which the specified element is to be inserted (must be in [0,size]).
+   * @param element element to be inserted.
+   * @throws IndexOutOfBoundsException index is out of range (<tt>index &lt; 0 || index &gt; size()</tt>).
+   */
+  public void beforeInsert(int index, T element) {
+    // overridden for performance only.
+    if (size == index) {
+      add(element);
+      return;
+    }
+    if (index > size || index < 0) {
+      throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + size);
+    }
+    ensureCapacity(size + 1);
+    System.arraycopy(elements, index, elements, index + 1, size - index);
+    elements[index] = element;
+    size++;
+  }
+
+
+  /**
+   * Returns a deep copy of the receiver.
+   *
+   * @return a deep copy of the receiver.
+   */
+  @SuppressWarnings("unchecked")
+  @Override
+  public Object clone() {
+    // overridden for performance only.
+    return new ObjectArrayList<T>((T[]) elements.clone());
+  }
+
+  /**
+   * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result.
+   *
+   * @return a deep copy of the receiver.
+   */
+  @SuppressWarnings("unchecked")
+  public ObjectArrayList<T> copy() {
+    return (ObjectArrayList<T>) clone();
+  }
+
+  /**
+   * Returns the elements currently stored, including invalid elements between size and capacity, if any.
+   *
+   * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>. So if
+   * subsequently you modify the returned array directly via the [] operator, be sure you know what you're doing.
+   *
+   * @return the elements currently stored.
+   */
+  @SuppressWarnings("unchecked")
+  public<Q> Q[] elements() {
+    return (Q[])elements;
+  }
+
+  /**
+   * Sets the receiver's elements to be the specified array (not a copy of it).
+   *
+   * The size and capacity of the list is the length of the array. <b>WARNING:</b> For efficiency reasons and to keep
+   * memory usage low, <b>the array is not copied</b>. So if subsequently you modify the specified array directly via
+   * the [] operator, be sure you know what you're doing.
+   *
+   * @param elements the new elements to be stored.
+   */
+  public void elements(T[] elements) {
+    this.elements = elements;
+    this.size = elements.length;
+  }
+
+  /**
+   * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new
+   * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver.
+   *
+   * @param minCapacity the desired minimum capacity.
+   */
+  public void ensureCapacity(int minCapacity) {
+    elements = org.apache.mahout.math.Arrays.ensureCapacity(elements, minCapacity);
+  }
+
+  /**
+   * Compares the specified Object with the receiver. Returns true if and only if the specified Object is also an
+   * ArrayList of the same type, both Lists have the same size, and all corresponding pairs of elements in the two Lists
+   * are identical. In other words, two Lists are defined to be equal if they contain the same elements in the same
+   * order.
+   *
+   * @param otherObj the Object to be compared for equality with the receiver.
+   * @return true if the specified Object is equal to the receiver.
+   */
+  @SuppressWarnings("unchecked")
+  public boolean equals(Object otherObj) { //delta
+    // overridden for performance only.
+    if (!(otherObj instanceof ObjectArrayList)) {
+      return super.equals(otherObj);
+    }
+    if (this == otherObj) {
+      return true;
+    }
+    if (otherObj == null) {
+      return false;
+    }
+    ObjectArrayList<?> other = (ObjectArrayList<?>) otherObj;
+    if (size() != other.size()) {
+      return false;
+    }
+
+    Object[] theElements = elements();
+    Object[] otherElements = other.elements();
+    for (int i = size(); --i >= 0;) {
+      if (theElements[i] != otherElements[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Applies a procedure to each element of the receiver, if any. Starts at index 0, moving rightwards.
+   *
+   * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise
+   *                  continues.
+   * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise.
+   */
+  @SuppressWarnings("unchecked")
+  public boolean forEach(ObjectProcedure<T> procedure) {
+    T[] theElements = (T[]) elements;
+    int theSize = size;
+
+    for (int i = 0; i < theSize;) {
+      if (!procedure.apply(theElements[i++])) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Returns the element at the specified position in the receiver.
+   *
+   * @param index index of element to return.
+   * @throws IndexOutOfBoundsException index is out of range (index &lt; 0 || index &gt;= size()).
+   */
+  @SuppressWarnings("unchecked")
+  public T get(int index) {
+    // overridden for performance only.
+    if (index >= size || index < 0) {
+      throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + size);
+    }
+    return (T) elements[index];
+  }
+
+  /**
+   * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions.
+   * Provided with invalid parameters this method may return invalid elements without throwing any exception! <b>You
+   * should only use this method when you are absolutely sure that the index is within bounds.</b> Precondition
+   * (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+   *
+   * @param index index of element to return.
+   */
+  @SuppressWarnings("unchecked")
+  public T getQuick(int index) {
+    return (T) elements[index];
+  }
+
+  /**
+   * Returns the index of the first occurrence of the specified element. Returns <code>-1</code> if the receiver does
+   * not contain this element. Searches between <code>from</code>, inclusive and <code>to</code>, inclusive. Tests for
+   * identity.
+   *
+   * @param element element to search for.
+   * @param from    the leftmost search position, inclusive.
+   * @param to      the rightmost search position, inclusive.
+   * @return the index of the first occurrence of the element in the receiver; returns <code>-1</code> if the element is
+   *         not found.
+   * @throws IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to ||
+   *                                   to&gt;=size())</tt>).
+   */
+  public int indexOfFromTo(T element, int from, int to) {
+    // overridden for performance only.
+    if (size == 0) {
+      return -1;
+    }
+    checkRangeFromTo(from, to, size);
+
+    Object[] theElements = elements;
+    for (int i = from; i <= to; i++) {
+      if (element == theElements[i]) {
+        return i;
+      } //found
+    }
+    return -1; //not found
+  }
+
+  /**
+   * Returns the index of the last occurrence of the specified element. Returns <code>-1</code> if the receiver does not
+   * contain this element. Searches beginning at <code>to</code>, inclusive until <code>from</code>, inclusive. Tests
+   * for identity.
+   *
+   * @param element element to search for.
+   * @param from    the leftmost search position, inclusive.
+   * @param to      the rightmost search position, inclusive.
+   * @return the index of the last occurrence of the element in the receiver; returns <code>-1</code> if the element is
+   *         not found.
+   * @throws IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to ||
+   *                                   to&gt;=size())</tt>).
+   */
+  public int lastIndexOfFromTo(T element, int from, int to) {
+    // overridden for performance only.
+    if (size == 0) {
+      return -1;
+    }
+    checkRangeFromTo(from, to, size);
+
+    Object[] theElements = elements;
+    for (int i = to; i >= from; i--) {
+      if (element == theElements[i]) {
+        return i;
+      } //found
+    }
+    return -1; //not found
+  }
+
+  /**
+   * Returns a new list of the part of the receiver between <code>from</code>, inclusive, and <code>to</code>,
+   * inclusive.
+   *
+   * @param from the index of the first element (inclusive).
+   * @param to   the index of the last element (inclusive).
+   * @return a new list
+   * @throws IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to ||
+   *                                   to&gt;=size())</tt>).
+   */
+  @SuppressWarnings("unchecked")
+  public AbstractObjectList<T> partFromTo(int from, int to) {
+    if (size == 0) {
+      return new ObjectArrayList<T>(0);
+    }
+
+    checkRangeFromTo(from, to, size);
+
+    Object[] part = new Object[to - from + 1];
+    System.arraycopy(elements, from, part, 0, to - from + 1);
+    return new ObjectArrayList<T>((T[]) part);
+  }
+
+  /** Reverses the elements of the receiver. Last becomes first, second last becomes second first, and so on. */
+  @Override
+  public void reverse() {
+    // overridden for performance only.
+    int limit = size / 2;
+    int j = size - 1;
+
+    Object[] theElements = elements;
+    for (int i = 0; i < limit;) { //swap
+      Object tmp = theElements[i];
+      theElements[i++] = theElements[j];
+      theElements[j--] = tmp;
+    }
+  }
+
+  /**
+   * Replaces the element at the specified position in the receiver with the specified element.
+   *
+   * @param index   index of element to replace.
+   * @param element element to be stored at the specified position.
+   * @throws IndexOutOfBoundsException index is out of range (index &lt; 0 || index &gt;= size()).
+   */
+  public void set(int index, T element) {
+    // overridden for performance only.
+    if (index >= size || index < 0) {
+      throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + size);
+    }
+    elements[index] = element;
+  }
+
+  /**
+   * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not
+   * check preconditions. Provided with invalid parameters this method may access invalid indexes without throwing any
+   * exception! <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+   * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+   *
+   * @param index   index of element to replace.
+   * @param element element to be stored at the specified position.
+   */
+  public void setQuick(int index, T element) {
+    elements[index] = element;
+  }
+
+  /**
+   * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluous internal memory. An
+   * application can use this operation to minimize the storage of the receiver.
+   */
+  @Override
+  public void trimToSize() {
+    elements = org.apache.mahout.math.Arrays.trimToCapacity(elements, size());
+  }
+
+  @Override
+  public void removeFromTo(int fromIndex, int toIndex) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void replaceFromWith(int from, Collection<T> other) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  protected void beforeInsertDummies(int index, int length) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void mergeSortFromTo(int from, int to) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void quickSortFromTo(int from, int to) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public int size() {
+    return size;
+  }
+}

Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java?rev=1339121&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java Wed May 16 11:29:08 2012
@@ -0,0 +1,102 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.list;
+
+/**
+ Resizable list holding <code>long</code> elements; implemented with arrays; not efficient; just to demonstrate which methods you must override to implement a fully functional list.
+ */
+public class SimpleLongArrayList extends AbstractLongList {
+
+  /**
+   * The array buffer into which the elements of the list are stored. The capacity of the list is the length of this
+   * array buffer.
+   */
+  private long[] elements;
+
+  /** Constructs an empty list. */
+  public SimpleLongArrayList() {
+    this(10);
+  }
+
+  /**
+   * Constructs a list containing the specified elements. The initial size and capacity of the list is the length of the
+   * array.
+   *
+   * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>. So if
+   * subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+   *
+   * @param elements the array to be backed by the the constructed list
+   */
+  public SimpleLongArrayList(long[] elements) {
+    elements(elements);
+  }
+
+  /**
+   * Constructs an empty list with the specified initial capacity.
+   *
+   * @param initialCapacity the number of elements the receiver can hold without auto-expanding itself by allocating new
+   *                        internal memory.
+   */
+  private SimpleLongArrayList(int initialCapacity) {
+    super();
+    if (initialCapacity < 0) {
+      throw new IllegalArgumentException("Illegal Capacity: " + initialCapacity);
+    }
+
+    this.elements(new long[initialCapacity]);
+    size = 0;
+  }
+
+  /**
+   * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new
+   * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver.
+   *
+   * @param minCapacity the desired minimum capacity.
+   */
+  @Override
+  public void ensureCapacity(int minCapacity) {
+    elements = org.apache.mahout.math.Arrays.ensureCapacity(elements, minCapacity);
+  }
+
+  /**
+   * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions.
+   * Provided with invalid parameters this method may return invalid elements without throwing any exception! <b>You
+   * should only use this method when you are absolutely sure that the index is within bounds.</b> Precondition
+   * (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+   *
+   * @param index index of element to return.
+   */
+  @Override
+  protected long getQuick(int index) {
+    return elements[index];
+  }
+
+  /**
+   * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not
+   * check preconditions. Provided with invalid parameters this method may access invalid indexes without throwing any
+   * exception! <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+   * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+   *
+   * @param index   index of element to replace.
+   * @param element element to be stored at the specified position.
+   */
+  @Override
+  protected void setQuick(int index, long element) {
+    elements[index] = element;
+  }
+
+  /**
+   * Trims the capacity of the receiver to be the receiver's current size. An application can use this operation to
+   * minimize the storage of the receiver.
+   */
+  @Override
+  public void trimToSize() {
+    elements = org.apache.mahout.math.Arrays.trimToCapacity(elements, size());
+  }
+}

Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package.html
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package.html?rev=1339121&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package.html (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/list/package.html Wed May 16 11:29:08 2012
@@ -0,0 +1,162 @@
+<HTML>
+<BODY>
+Resizable lists holding objects or primitive data types such as <tt>int</tt>,
+<tt>double</tt>, etc. For non-resizable lists (1-dimensional matrices) see
+package <code>org.apache.mahout.math.matrix</code>.<p></p>
+
+<h1><a name="Overview"></a>Getting Started</h1>
+
+<h2>1. Overview</h2>
+
+<p>The list package offers flexible object oriented abstractions modelling dynamically
+  resizing lists holding objects or primitive data types such as <tt>int</tt>,
+  <tt>double</tt>, etc. It is designed to be scalable in terms of performance
+  and memory requirements.</p>
+
+<p>Features include: </p>
+
+<p></p>
+<ul>
+  <li>Lists operating on objects as well as all primitive data types such as <tt>int</tt>,
+    <tt>double</tt>, etc.
+  </li>
+  <li>Compact representations</li>
+  <li>A number of general purpose list operations including: adding, inserting,
+    removing, iterating, searching, sorting, extracting ranges and copying. All
+    operations are designed to perform well on mass data.
+  </li>
+  <li>Support for quick access to list elements. This is achieved by bounds-checking
+    and non-bounds-checking accessor methods as well as zero-copy transformations
+    to primitive arrays such as <tt>int[]</tt>, <tt>double[]</tt>, etc.
+  </li>
+  <li>Allows to use high level algorithms on primitive data types without any
+    space and time overhead. Operations on primitive arrays, Colt lists and JAL
+    algorithms can freely be mixed at zero copy overhead.
+  </li>
+</ul>
+<p>File-based I/O can be achieved through the standard Java built-in serialization
+  mechanism. All classes implement the {@link java.io.Serializable} interface.
+  However, the toolkit is entirely decoupled from advanced I/O. It provides data
+  structures and algorithms only.
+
+<p> This toolkit borrows concepts and terminology from the Javasoft <a
+    href="http://www.javasoft.com/products/jdk/1.2/docs/guide/collections/index.html">
+  Collections framework</a> written by Josh Bloch and introduced in JDK 1.2.
+
+<h2>2. Introduction</h2>
+
+<p>Lists are fundamental to virtually any application. Large scale resizable lists
+  are, for example, used in scientific computations, simulations database management
+  systems, to name just a few.</p>
+
+<h2></h2>
+
+<p>A list is a container holding elements that can be accessed via zero-based
+  indexes. Lists may be implemented in different ways (most commonly with arrays).
+  A resizable list automatically grows as elements are added. The lists of this
+  package do not automatically shrink. Shrinking needs to be triggered by explicitly
+  calling <tt>trimToSize()</tt> methods.</p>
+
+<p><i>Growing policy</i>: A list implemented with arrays initially has a certain
+  <tt>initialCapacity</tt> - per default 10 elements, but customizable upon instance
+  construction. As elements are added, this capacity may nomore be sufficient.
+  When a list is automatically grown, its capacity is expanded to <tt>1.5*currentCapacity</tt>.
+  Thus, excessive resizing (involving copying) is avoided.</p>
+<h4>Copying</h4>
+
+<p>
+
+<p>Any list can be copied. A copy is <i>equal</i> to the original but entirely
+  independent of the original. So changes in the copy are not reflected in the
+  original, and vice-versa.
+
+<h2>3. Organization of this package</h2>
+
+<p>Class naming follows the schema <tt>&lt;ElementType&gt;&lt;ImplementationTechnique&gt;List</tt>.
+  For example, we have a {@link org.apache.mahout.math.list.DoubleArrayList}, which is a list
+  holding <tt>double</tt> elements implemented with <tt>double</tt>[] arrays.
+</p>
+
+<p>The classes for lists of a given value type are derived from a common abstract
+  base class tagged <tt>Abstract&lt;ElementType&gt;</tt><tt>List</tt>. For example,
+  all lists operating on <tt>double</tt> elements are derived from
+  {@link org.apache.mahout.math.list.AbstractDoubleList},
+  which in turn is derived from an abstract base class tying together all lists
+  regardless of value type, {@link org.apache.mahout.math.list.AbstractList}. The abstract
+  base classes provide skeleton implementations for all but few methods. Experimental
+  data layouts (such as compressed, sparse, linked, etc.) can easily be implemented
+  and inherit a rich set of functionality. Have a look at the javadoc <a href="package-tree.html">tree
+    view</a> to get the broad picture.</p>
+
+<h2>4. Example usage</h2>
+
+<p>The following snippet fills a list, randomizes it, extracts the first half
+  of the elements, sums them up and prints the result. It is implemented entirely
+  with accessor methods.</p>
+<table>
+  <td class="PRE">
+      <pre>
+int s = 1000000;<br>AbstractDoubleList list = new DoubleArrayList();
+for (int i=0; i&lt;s; i++) { list.add((double)i); }
+list.shuffle();
+AbstractDoubleList part = list.partFromTo(0,list.size()/2 - 1);
+double sum = 0.0;
+for (int i=0; i&lt;part.size(); i++) { sum += part.get(i); }
+log.info(sum);
+</pre>
+  </td>
+</table>
+<p> For efficiency, all classes provide back doors to enable getting/setting the
+  backing array directly. In this way, the high level operations of these classes
+  can be used where appropriate, and one can switch to <tt>[]</tt>-array index
+  notations where necessary. The key methods for this are <tt>public &lt;ElementType&gt;[]
+    elements()</tt> and <tt>public void elements(&lt;ElementType&gt;[])</tt>. The
+  former trustingly returns the array it internally keeps to store the elements.
+  Holding this array in hand, we can use the <tt>[]</tt>-array operator to
+  perform iteration over large lists without needing to copy the array or paying
+  the performance penalty introduced by accessor methods. Alternatively any JAL
+  algorithm (or other algorithm) can operate on the returned primitive array.
+  The latter method forces a list to internally hold a user provided array. Using
+  this approach one can avoid needing to copy the elements into the list.
+
+<p>As a consequence, operations on primitive arrays, Colt lists and JAL algorithms
+  can freely be mixed at zero-copy overhead.
+
+<p> Note that such special treatment certainly breaks encapsulation. This functionality
+  is provided for performance reasons only and should only be used when absolutely
+  necessary. Here is the above example in mixed notation:
+<table>
+  <td class="PRE">
+      <pre>
+int s = 1000000;<br>DoubleArrayList list = new DoubleArrayList(s); // list.size()==0, capacity==s
+list.setSize(s); // list.size()==s<br>double[] values = list.elements(); // zero copy, values.length==s<br>for (int i=0; i&lt;s; i++) { values[i]=(double)i; }
+list.shuffle();
+double sum = 0.0;
+int limit = values.length/2;
+for (int i=0; i&lt;limit; i++) { sum += values[i]; }
+log.info(sum);
+</pre>
+  </td>
+</table>
+<p> Or even more compact using lists as algorithm objects:
+<table>
+  <td class="PRE">
+      <pre>
+int s = 1000000;<br>double[] values = new double[s];
+for (int i=0; i&lt;s; i++) { values[i]=(double)i; }
+new DoubleArrayList(values).shuffle(); // zero-copy, shuffle via back door
+double sum = 0.0;
+int limit = values.length/2;
+for (int i=0; i&lt;limit; i++) { sum += values[i]; }
+log.info(sum);
+</pre>
+  </td>
+</table>
+<p>
+
+<h2>5. Notes </h2>
+
+<p>The quicksorts and mergesorts are the JDK 1.2 V1.26 algorithms, modified as
+  necessary to operate on the given data types.
+</BODY>
+</HTML>

Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/map/HashFunctions.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/map/HashFunctions.java?rev=1339121&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/map/HashFunctions.java (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/map/HashFunctions.java Wed May 16 11:29:08 2012
@@ -0,0 +1,118 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.map;
+
+/**
+ * Provides various hash functions.
+ *
+ */
+public final class HashFunctions {
+
+  /**
+   * Utility class pattern: all static members, no inheritance.
+   */
+  private HashFunctions() {
+  }
+
+  /**
+   * Returns a hashcode for the specified value.
+   *
+   * @return a hash code value for the specified value.
+   */
+  public static int hash(char value) {
+    return (int) value;
+  }
+
+  /**
+   * Returns a hashcode for the specified value.
+   *
+   * @return a hash code value for the specified value.
+   */
+  public static int hash(double value) {
+    long bits = Double.doubleToLongBits(value);
+    return (int) (bits ^ (bits >>> 32));
+
+    //return (int) Double.doubleToLongBits(value*663608941.737);
+    // this avoids excessive hashCollisions in the case values are of the form (1.0, 2.0, 3.0, ...)
+  }
+
+  /**
+   * Returns a hashcode for the specified value.
+   *
+   * @return a hash code value for the specified value.
+   */
+  public static int hash(float value) {
+    return Float.floatToIntBits(value * 663608941.737f);
+    // this avoids excessive hashCollisions in the case values are of the form (1.0, 2.0, 3.0, ...)
+  }
+
+  /**
+   * Returns a hashcode for the specified value.
+   *
+   * @return a hash code value for the specified value.
+   */
+  public static int hash(int value) {
+    return value;
+
+    //return value * 0x278DDE6D; // see org.apache.mahout.math.jet.random.engine.DRand
+
+    /*
+    value &= 0x7FFFFFFF; // make it >=0
+    int hashCode = 0;
+    do hashCode = 31*hashCode + value%10;
+    while ((value /= 10) > 0);
+
+    return 28629151*hashCode; // spread even further; h*31^5
+    */
+  }
+
+  /**
+   * Returns a hashcode for the specified value.
+   *
+   * @return a hash code value for the specified value.
+   */
+  public static int hash(long value) {
+    return (int) (value ^ (value >> 32));
+    /*
+    value &= 0x7FFFFFFFFFFFFFFFL; // make it >=0 (0x7FFFFFFFFFFFFFFFL==Long.MAX_VALUE)
+    int hashCode = 0;
+    do hashCode = 31*hashCode + (int) (value%10);
+    while ((value /= 10) > 0);
+
+    return 28629151*hashCode; // spread even further; h*31^5
+    */
+  }
+
+  /**
+   * Returns a hashcode for the specified object.
+   *
+   * @return a hash code value for the specified object.
+   */
+  public static int hash(Object object) {
+    return object == null ? 0 : object.hashCode();
+  }
+
+  /**
+   * Returns a hashcode for the specified value.
+   *
+   * @return a hash code value for the specified value.
+   */
+  public static int hash(short value) {
+    return (int) value;
+  }
+
+  /**
+   * Returns a hashcode for the specified value.
+   *
+   * @return a hash code value for the specified value.
+   */
+  public static int hash(boolean value) {
+    return value ? 1231 : 1237;
+  }
+}

Copied: mahout/trunk/math/src/main/java/org/apache/mahout/math/map/OpenHashMap.java (from r1339109, mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/OpenObjectValueTypeHashMap.java.t)
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/map/OpenHashMap.java?p2=mahout/trunk/math/src/main/java/org/apache/mahout/math/map/OpenHashMap.java&p1=mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/OpenObjectValueTypeHashMap.java.t&r1=1339109&r2=1339121&rev=1339121&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java-templates/org/apache/mahout/math/map/OpenObjectValueTypeHashMap.java.t (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/map/OpenHashMap.java Wed May 16 11:29:08 2012
@@ -18,7 +18,7 @@
  */
 
 /*
-Copyright � 1999 CERN - European Organization for Nuclear Research.
+Copyright � 1999 CERN - European Organization for Nuclear Research.
 Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
 is hereby granted without fee, provided that the above copyright notice appear in all copies and 
 that both that copyright notice and this permission notice appear in supporting documentation. 
@@ -27,17 +27,26 @@ It is provided "as is" without expressed
 */
 package org.apache.mahout.math.map;
 
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
 
-import org.apache.mahout.math.function.Object${valueTypeCap}Procedure;
+import org.apache.mahout.math.function.ObjectObjectProcedure;
 import org.apache.mahout.math.function.ObjectProcedure;
-import org.apache.mahout.math.list.${valueTypeCap}ArrayList;
+import org.apache.mahout.math.set.AbstractSet;
+import org.apache.mahout.math.set.OpenHashSet;
 
 /**
-  * Open hash map from Object keys to ${valueType} values.
+  * Open hash map. This implements Map, but it does not respect several aspects of the Map contract
+  * that impose the very sorts of performance penalities that this class exists to avoid.
+  * {@link #entrySet}, {@link #values}, and {@link #keySet()} do <strong>not</strong> return
+  * collections that share storage with the main map, and changes to those returned objects
+  * are <strong>not</strong> reflected in the container.
  **/
-public class OpenObject${valueTypeCap}HashMap<T> extends AbstractObject${valueTypeCap}Map<T> {
+public class OpenHashMap<K,V> extends AbstractSet implements Map<K,V> {
   protected static final byte FREE = 0;
   protected static final byte FULL = 1;
   protected static final byte REMOVED = 2;
@@ -47,7 +56,7 @@ public class OpenObject${valueTypeCap}Ha
   protected Object[] table;
 
   /** The hash table values. */
-  protected ${valueType}[] values;
+  protected Object[] values;
 
   /** The state of each hash table entry (FREE, FULL, REMOVED). */
   protected byte[] state;
@@ -57,7 +66,7 @@ public class OpenObject${valueTypeCap}Ha
 
 
   /** Constructs an empty map with default capacity and default load factors. */
-  public OpenObject${valueTypeCap}HashMap() {
+  public OpenHashMap() {
     this(defaultCapacity);
   }
 
@@ -67,7 +76,7 @@ public class OpenObject${valueTypeCap}Ha
    * @param initialCapacity the initial capacity of the map.
    * @throws IllegalArgumentException if the initial capacity is less than zero.
    */
-  public OpenObject${valueTypeCap}HashMap(int initialCapacity) {
+  public OpenHashMap(int initialCapacity) {
     this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
   }
 
@@ -81,14 +90,14 @@ public class OpenObject${valueTypeCap}Ha
    *                                  (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >=
    *                                  maxLoadFactor)</tt>.
    */
-  public OpenObject${valueTypeCap}HashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
+  public OpenHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
     setUp(initialCapacity, minLoadFactor, maxLoadFactor);
   }
 
   /** Removes all (key,value) associations from the receiver. Implicitly calls <tt>trimToSize()</tt>. */
   @Override
   public void clear() {
-    Arrays.fill(this.state, 0, state.length - 1, FREE);
+    Arrays.fill(this.state, FREE);
     distinct = 0;
     freeEntries = table.length; // delta
     trimToSize();
@@ -102,7 +111,7 @@ public class OpenObject${valueTypeCap}Ha
   @Override
   @SuppressWarnings("unchecked")
   public Object clone() {
-    OpenObject${valueTypeCap}HashMap copy = (OpenObject${valueTypeCap}HashMap) super.clone();
+    OpenHashMap<K,V> copy = (OpenHashMap<K,V>) super.clone();
     copy.table = copy.table.clone();
     copy.values = copy.values.clone();
     copy.state = copy.state.clone();
@@ -114,9 +123,10 @@ public class OpenObject${valueTypeCap}Ha
    *
    * @return <tt>true</tt> if the receiver contains the specified key.
    */
+  @SuppressWarnings("unchecked")
   @Override
-  public boolean containsKey(T key) {
-    return indexOfKey(key) >= 0;
+  public boolean containsKey(Object key) {
+    return indexOfKey((K)key) >= 0;
   }
 
   /**
@@ -124,9 +134,10 @@ public class OpenObject${valueTypeCap}Ha
    *
    * @return <tt>true</tt> if the receiver contains the specified value.
    */
+  @SuppressWarnings("unchecked")
   @Override
-  public boolean containsValue(${valueType} value) {
-    return indexOfValue(value) >= 0;
+  public boolean containsValue(Object value) {
+    return indexOfValue((V)value) >= 0;
   }
 
   /**
@@ -157,12 +168,11 @@ public class OpenObject${valueTypeCap}Ha
    *                  continues.
    * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
    */
-  @Override
   @SuppressWarnings("unchecked")
-  public boolean forEachKey(ObjectProcedure<T> procedure) {
+  public boolean forEachKey(ObjectProcedure<K> procedure) {
     for (int i = table.length; i-- > 0;) {
       if (state[i] == FULL) {
-        if (!procedure.apply((T)table[i])) {
+        if (!procedure.apply((K)table[i])) {
           return false;
         }
       }
@@ -178,12 +188,11 @@ public class OpenObject${valueTypeCap}Ha
    *                  continues.
    * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
    */
-  @Override
-  @SuppressWarnings("unchecked")
-  public boolean forEachPair(Object${valueTypeCap}Procedure<T> procedure) {
+    @SuppressWarnings("unchecked")
+  public boolean forEachPair(ObjectObjectProcedure<K,V> procedure) {
     for (int i = table.length; i-- > 0;) {
       if (state[i] == FULL) {
-        if (!procedure.apply((T)table[i], values[i])) {
+        if (!procedure.apply((K)table[i], (V)values[i])) {
           return false;
         }
       }
@@ -193,19 +202,20 @@ public class OpenObject${valueTypeCap}Ha
 
   /**
    * Returns the value associated with the specified key. It is often a good idea to first check with {@link
-   * #containsKey(double)} whether the given key has a value associated or not, i.e. whether there exists an association
+   * #containsKey(Object)} whether the given key has a value associated or not, i.e. whether there exists an association
    * for the given key or not.
    *
    * @param key the key to be searched for.
    * @return the value associated with the specified key; <tt>0</tt> if no such key is present.
    */
+  @SuppressWarnings("unchecked")
   @Override
-  public ${valueType} get(T key) {
-    final int i = indexOfKey(key);
+  public V get(Object key) {
+    int i = indexOfKey((K)key);
     if (i < 0) {
-      return 0;
+      return null;
     } //not contained
-    return values[i];
+    return (V)values[i];
   }
 
   /**
@@ -215,10 +225,12 @@ public class OpenObject${valueTypeCap}Ha
    *         at slot -index-1. If the returned index >= 0, then it is NOT already contained and should be inserted at
    *         slot index.
    */
-  protected int indexOfInsertion(T key) {
-    final int length = table.length;
+  protected int indexOfInsertion(K key) {
+    Object[] tab = table;
+    byte[] stat = state;
+    int length = tab.length;
 
-    final int hash = key.hashCode() & 0x7FFFFFFF;
+    int hash = key.hashCode() & 0x7FFFFFFF;
     int i = hash % length;
     int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
     //int decrement = (hash / length) % length;
@@ -228,7 +240,7 @@ public class OpenObject${valueTypeCap}Ha
 
     // stop if we find a removed or free slot, or if we find the key itself
     // do NOT skip over removed slots (yes, open addressing is like that...)
-    while (state[i] == FULL && !equalsMindTheNull(table[i], key)) {
+    while (stat[i] == FULL && !equalsMindTheNull(key, tab[i])) {
       i -= decrement;
       //hashCollisions++;
       if (i < 0) {
@@ -236,25 +248,25 @@ public class OpenObject${valueTypeCap}Ha
       }
     }
 
-    if (state[i] == REMOVED) {
+    if (stat[i] == REMOVED) {
       // stop if we find a free slot, or if we find the key itself.
       // do skip over removed slots (yes, open addressing is like that...)
       // assertion: there is at least one FREE slot.
-      final int j = i;
-      while (state[i] != FREE && (state[i] == REMOVED || !equalsMindTheNull(table[i], key))) {
+      int j = i;
+      while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
         i -= decrement;
         //hashCollisions++;
         if (i < 0) {
           i += length;
         }
       }
-      if (state[i] == FREE) {
+      if (stat[i] == FREE) {
         i = j;
       }
     }
 
 
-    if (state[i] == FULL) {
+    if (stat[i] == FULL) {
       // key already contained at slot i.
       // return a negative number identifying the slot.
       return -i - 1;
@@ -268,10 +280,12 @@ public class OpenObject${valueTypeCap}Ha
    * @param key the key to be searched in the receiver.
    * @return the index where the key is contained in the receiver, returns -1 if the key was not found.
    */
-  protected int indexOfKey(T key) {
-    final int length = table.length;
+  protected int indexOfKey(K key) {
+    Object[] tab = table;
+    byte[] stat = state;
+    int length = tab.length;
 
-    final int hash = key.hashCode() & 0x7FFFFFFF;
+    int hash = key.hashCode() & 0x7FFFFFFF;
     int i = hash % length;
     int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
     //int decrement = (hash / length) % length;
@@ -281,7 +295,7 @@ public class OpenObject${valueTypeCap}Ha
 
     // stop if we find a free slot, or if we find the key itself.
     // do skip over removed slots (yes, open addressing is like that...)
-    while (state[i] != FREE && (state[i] == REMOVED || !equalsMindTheNull(table[i], key))) {
+    while (stat[i] != FREE && (stat[i] == REMOVED || !equalsMindTheNull(key, tab[i]))) {
       i -= decrement;
       //hashCollisions++;
       if (i < 0) {
@@ -289,7 +303,7 @@ public class OpenObject${valueTypeCap}Ha
       }
     }
 
-    if (state[i] == FREE) {
+    if (stat[i] == FREE) {
       return -1;
     } // not found
     return i; //found, return index where key is contained
@@ -299,12 +313,12 @@ public class OpenObject${valueTypeCap}Ha
    * @param value the value to be searched in the receiver.
    * @return the index where the value is contained in the receiver, returns -1 if the value was not found.
    */
-  protected int indexOfValue(${valueType} value) {
-    ${valueType}[] val = values;
+  protected int indexOfValue(V value) {
+    Object[] val = values;
     byte[] stat = state;
 
     for (int i = stat.length; --i >= 0;) {
-      if (stat[i] == FULL && val[i] == value) {
+      if (stat[i] == FULL && equalsMindTheNull(val[i], value)) {
         return i;
       }
     }
@@ -320,46 +334,17 @@ public class OpenObject${valueTypeCap}Ha
    *
    * @param list the list to be filled, can have any size.
    */
-  @Override
   @SuppressWarnings("unchecked")
-  public void keys(List<T> list) {
+  public void keys(List<K> list) {
     list.clear();
+  
 
-    for (int i = table.length; i-- > 0;) {
-      if (state[i] == FULL) {
-        list.add((T)table[i]);
-      }
-    }
-  }
-
-  /**
-   * Fills all pairs satisfying a given condition into the specified lists. Fills into the lists, starting at index 0.
-   * After this call returns the specified lists both have a new size, the number of pairs satisfying the condition.
-   *  <p> <b>Example:</b> <br>
-   * <pre>
-   * Object${valueTypeCap}Procedure<T> condition = new Object${valueTypeCap}Procedure<T>() { // match even values only
-   * public boolean apply(T key, ${valueType} value) { return value%2==0; }
-   * }
-   * keys = (8,7,6), values = (1,2,2) --> keyList = (6,8), valueList = (2,1)</tt>
-   * </pre>
-   *
-   * @param condition the condition to be matched. Takes the current key as first and the current value as second
-   *                  argument.
-   * @param keyList   the list to be filled with keys, can have any size.
-   * @param valueList the list to be filled with values, can have any size.
-   */
-  @Override
-  @SuppressWarnings("unchecked")
-  public void pairsMatching(Object${valueTypeCap}Procedure<T> condition, 
-                            List<T> keyList, 
-                            ${valueTypeCap}ArrayList valueList) {
-    keyList.clear();
-    valueList.clear();
+    Object [] tab = table;
+    byte[] stat = state;
 
-    for (int i = table.length; i-- > 0;) {
-      if (state[i] == FULL && condition.apply((T)table[i], values[i])) {
-        keyList.add((T)table[i]);
-        valueList.add(values[i]);
+    for (int i = tab.length; i-- > 0;) {
+      if (stat[i] == FULL) {
+        list.add((K)tab[i]);
       }
     }
   }
@@ -373,13 +358,15 @@ public class OpenObject${valueTypeCap}Ha
    * @return <tt>true</tt> if the receiver did not already contain such a key; <tt>false</tt> if the receiver did
    *         already contain such a key - the new value has now replaced the formerly associated value.
    */
+  @SuppressWarnings("unchecked")
   @Override
-  public boolean put(T key, ${valueType} value) {
+  public V put(K key, V value) {
     int i = indexOfInsertion(key);
     if (i < 0) { //already contained
       i = -i - 1;
+      V previous = (V) this.values[i];
       this.values[i] = value;
-      return false;
+      return previous;
     }
 
     if (this.distinct > this.highWaterMark) {
@@ -401,21 +388,8 @@ public class OpenObject${valueTypeCap}Ha
       rehash(newCapacity);
     }
 
-    return true;
+    return null;
   }
-  
-    @Override
-  public ${valueType} adjustOrPutValue(T key, ${valueType} newValue, ${valueType} incrValue) {
-    int i = indexOfInsertion(key);
-    if (i < 0) { //already contained
-      i = -i - 1;
-      this.values[i] += incrValue;
-      return this.values[i];
-    } else {
-        put(key, newValue);
-        return newValue;
-    }
- }
 
   /**
    * Rehashes the contents of the receiver into a new table with a smaller or larger capacity. This method is called
@@ -428,25 +402,28 @@ public class OpenObject${valueTypeCap}Ha
     //if (oldCapacity == newCapacity) return;
 
     Object[] oldTable = table;
-    ${valueType}[] oldValues = values;
+    Object[] oldValues = values;
     byte[] oldState = state;
 
-    this.table = new Object[newCapacity];
-    this.values = new ${valueType}[newCapacity];
-    this.state = new byte[newCapacity];
+    Object[] newTable = new Object[newCapacity];
+    Object[] newValues = new Object[newCapacity];
+    byte[] newState = new byte[newCapacity];
 
     this.lowWaterMark = chooseLowWaterMark(newCapacity, this.minLoadFactor);
     this.highWaterMark = chooseHighWaterMark(newCapacity, this.maxLoadFactor);
 
+    this.table = newTable;
+    this.values = newValues;
+    this.state = newState;
     this.freeEntries = newCapacity - this.distinct; // delta
 
     for (int i = oldCapacity; i-- > 0;) {
       if (oldState[i] == FULL) {
         Object element = oldTable[i];
-        int index = indexOfInsertion((T)element);
-        this.table[index] = element;
-        this.values[index] = oldValues[i];
-        this.state[index] = FULL;
+        int index = indexOfInsertion((K)element);
+        newTable[index] = element;
+        newValues[index] = oldValues[i];
+        newState[index] = FULL;
       }
     }
   }
@@ -457,12 +434,15 @@ public class OpenObject${valueTypeCap}Ha
    * @param key the key to be removed from the receiver.
    * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
    */
+  @SuppressWarnings("unchecked")
   @Override
-  public boolean removeKey(T key) {
-    int i = indexOfKey(key);
+  public V remove(Object key) {
+    int i = indexOfKey((K)key);
     if (i < 0) {
-      return false;
-    } // key not contained
+      return null;
+    }
+    // key not contained
+    V removed = (V) values[i];
 
     this.state[i] = REMOVED;
     //this.values[i]=0; // delta
@@ -473,7 +453,7 @@ public class OpenObject${valueTypeCap}Ha
       rehash(newCapacity);
     }
 
-    return true;
+    return removed;
   }
 
   /**
@@ -496,7 +476,7 @@ public class OpenObject${valueTypeCap}Ha
     } // open addressing needs at least one FREE slot at any time.
 
     this.table = new Object[capacity];
-    this.values = new ${valueType}[capacity];
+    this.values = new Object[capacity];
     this.state = new byte[capacity];
 
     // memory will be exhausted long before this pathological case happens, anyway.
@@ -533,37 +513,144 @@ public class OpenObject${valueTypeCap}Ha
   }
 
   /**
-   * Fills all values contained in the receiver into the specified list. Fills the list, starting at index 0. After this
-   * call returns the specified list has a new size that equals <tt>this.size()</tt>. 
-   * <p> This method can be used
-   * to iterate over the values of the receiver.
-   *
-   * @param list the list to be filled, can have any size.
-   */
-  @Override
-  public void values(${valueTypeCap}ArrayList list) {
-    list.setSize(distinct);
-    ${valueType}[] elements = list.elements();
-
-    int j = 0;
-    for (int i = state.length; i-- > 0;) {
-      if (state[i] == FULL) {
-        elements[j++] = values[i];
-      }
-    }
-  }
-  
-  /**
    * Access for unit tests.
    * @param capacity
    * @param minLoadFactor
    * @param maxLoadFactor
    */
-  protected void getInternalFactors(int[] capacity, 
+  void getInternalFactors(int[] capacity, 
       double[] minLoadFactor, 
       double[] maxLoadFactor) {
     capacity[0] = table.length;
     minLoadFactor[0] = this.minLoadFactor;
     maxLoadFactor[0] = this.maxLoadFactor;
   }
+
+  private class MapEntry implements Map.Entry<K,V> {
+    private final K key;
+    private final V value;
+    
+    MapEntry(K key, V value) {
+      this.key = key;
+      this.value = value;
+    }
+
+    @Override
+    public K getKey() {
+      return key;
+    }
+
+    @Override
+    public V getValue() {
+      return value;
+    }
+
+    @Override
+    public V setValue(V value) {
+      throw new UnsupportedOperationException("Map.Entry.setValue not supported for OpenHashMap");
+    }
+    
+  }
+
+  /**
+   * Allocate a set to contain Map.Entry objects for the pairs and return it.
+   */
+  @Override
+  public Set<java.util.Map.Entry<K,V>> entrySet() {
+    final Set<Entry<K, V>> entries = new OpenHashSet<Map.Entry<K,V>>();
+    forEachPair(new ObjectObjectProcedure<K,V>() {
+
+      @Override
+      public boolean apply(K key, V value) {
+        entries.add(new MapEntry(key, value));
+        return true;
+      }});
+    return entries;
+  }
+
+  /**
+   * Allocate a set to contain keys and return it.
+   * This violates the 'backing' provisions of the map interface.
+   */
+  @Override
+  public Set<K> keySet() {
+    final Set<K> keys = new OpenHashSet<K>();
+    forEachKey(new ObjectProcedure<K>() {
+
+      @Override
+      public boolean apply(K element) {
+        keys.add(element);
+        return true;
+      }});
+    return keys;
+  }
+
+  @Override
+  public void putAll(Map<? extends K,? extends V> m) {
+    for (Map.Entry<? extends K, ? extends V> e : m.entrySet()) {
+      put(e.getKey(), e.getValue());
+    }
+  }
+
+  /**
+   * Allocate a list to contain the values and return it.
+   * This violates the 'backing' provision of the Map interface.
+   */
+  @Override
+  public Collection<V> values() {
+    final List<V> valueList = new ArrayList<V>();
+    forEachPair(new ObjectObjectProcedure<K,V>() {
+
+      @Override
+      public boolean apply(K key, V value) {
+        valueList.add(value);
+        return true;
+      }});
+    return valueList;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public boolean equals(Object obj) {
+    if (! (obj instanceof OpenHashMap)) {
+      return false;
+    }
+    final OpenHashMap<K,V> o = (OpenHashMap<K,V>) obj;
+    if (o.size() != size()) {
+      return false;
+    }
+    final boolean[] equal = new boolean[1];
+    equal[0] = true;
+    forEachPair(new ObjectObjectProcedure<K,V>() {
+
+      @Override
+      public boolean apply(K key, V value) {
+        Object ov = o.get(key);
+        if (!value.equals(ov)) {
+          equal[0] = false;
+          return false;
+        }
+        return true;
+      }});
+    return equal[0];
+  }
+
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder();
+    sb.append('{');
+    forEachPair(new ObjectObjectProcedure<K,V>() {
+
+      @Override
+      public boolean apply(K key, V value) {
+        sb.append('[');
+        sb.append(key);
+        sb.append(" -> ");
+        sb.append(value);
+        sb.append("] ");
+        return true;
+      }});
+    sb.append('}');
+    return sb.toString();
+  }
 }

Added: mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java?rev=1339121&view=auto
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java (added)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java Wed May 16 11:29:08 2012
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.mahout.math.map;
+
+import java.util.Arrays;
+
+/**
+ * Not of interest for users; only for implementors of hashtables.
+ * Used to keep hash table capacities prime numbers.
+ *
+ * <p>Choosing prime numbers as hash table capacities is a good idea to keep them working fast,
+ * particularly under hash table expansions.
+ *
+ * <p>However, JDK 1.2, JGL 3.1 and many other toolkits do nothing to keep capacities prime.
+ * This class provides efficient means to choose prime capacities.
+ *
+ * <p>Choosing a prime is <tt>O(log 300)</tt> (binary search in a list of 300 int's).
+ * Memory requirements: 1 KB static memory.
+ *
+ */
+public class PrimeFinder {
+
+  /** The largest prime this class can generate; currently equal to <tt>Integer.MAX_VALUE</tt>. */
+  public static final int largestPrime = Integer.MAX_VALUE; //yes, it is prime.
+
+  /**
+   * The prime number list consists of 11 chunks. Each chunk contains prime numbers. A chunk starts with a prime P1. The
+   * next element is a prime P2. P2 is the smallest prime for which holds: P2 >= 2*P1. The next element is P3, for which
+   * the same holds with respect to P2, and so on.
+   *
+   * Chunks are chosen such that for any desired capacity >= 1000 the list includes a prime number <= desired capacity *
+   * 1.11 (11%). For any desired capacity >= 200 the list includes a prime number <= desired capacity * 1.16 (16%). For
+   * any desired capacity >= 16 the list includes a prime number <= desired capacity * 1.21 (21%).
+   *
+   * Therefore, primes can be retrieved which are quite close to any desired capacity, which in turn avoids wasting
+   * memory. For example, the list includes 1039,1117,1201,1277,1361,1439,1523,1597,1759,1907,2081. So if you need a
+   * prime >= 1040, you will find a prime <= 1040*1.11=1154.
+   *
+   * Chunks are chosen such that they are optimized for a hashtable growthfactor of 2.0; If your hashtable has such a
+   * growthfactor then, after initially "rounding to a prime" upon hashtable construction, it will later expand to prime
+   * capacities such that there exist no better primes.
+   *
+   * In total these are about 32*10=320 numbers -> 1 KB of static memory needed. If you are stingy, then delete every
+   * second or fourth chunk.
+   */
+
+  private static final int[] primeCapacities = {
+      //chunk #0
+      largestPrime,
+
+      //chunk #1
+      5, 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853, 25717, 51437, 102877, 205759,
+      411527, 823117, 1646237, 3292489, 6584983, 13169977, 26339969, 52679969, 105359939,
+      210719881, 421439783, 842879579, 1685759167,
+
+      //chunk #2
+      433, 877, 1759, 3527, 7057, 14143, 28289, 56591, 113189, 226379, 452759, 905551, 1811107,
+      3622219, 7244441, 14488931, 28977863, 57955739, 115911563, 231823147, 463646329, 927292699,
+      1854585413,
+
+      //chunk #3
+      953, 1907, 3821, 7643, 15287, 30577, 61169, 122347, 244703, 489407, 978821, 1957651, 3915341,
+      7830701, 15661423, 31322867, 62645741, 125291483, 250582987, 501165979, 1002331963,
+      2004663929,
+
+      //chunk #4
+      1039, 2081, 4177, 8363, 16729, 33461, 66923, 133853, 267713, 535481, 1070981, 2141977, 4283963,
+      8567929, 17135863, 34271747, 68543509, 137087021, 274174111, 548348231, 1096696463,
+
+      //chunk #5
+      31, 67, 137, 277, 557, 1117, 2237, 4481, 8963, 17929, 35863, 71741, 143483, 286973, 573953,
+      1147921, 2295859, 4591721, 9183457, 18366923, 36733847, 73467739, 146935499, 293871013,
+      587742049, 1175484103,
+
+      //chunk #6
+      599, 1201, 2411, 4831, 9677, 19373, 38747, 77509, 155027, 310081, 620171, 1240361, 2480729,
+      4961459, 9922933, 19845871, 39691759, 79383533, 158767069, 317534141, 635068283, 1270136683,
+
+      //chunk #7
+      311, 631, 1277, 2557, 5119, 10243, 20507, 41017, 82037, 164089, 328213, 656429, 1312867,
+      2625761, 5251529, 10503061, 21006137, 42012281, 84024581, 168049163, 336098327, 672196673,
+      1344393353,
+
+      //chunk #8
+      3, 7, 17, 37, 79, 163, 331, 673, 1361, 2729, 5471, 10949, 21911, 43853, 87719, 175447, 350899,
+      701819, 1403641, 2807303, 5614657, 11229331, 22458671, 44917381, 89834777, 179669557,
+      359339171, 718678369, 1437356741,
+
+      //chunk #9
+      43, 89, 179, 359, 719, 1439, 2879, 5779, 11579, 23159, 46327, 92657, 185323, 370661, 741337,
+      1482707, 2965421, 5930887, 11861791, 23723597, 47447201, 94894427, 189788857, 379577741,
+      759155483, 1518310967,
+
+      //chunk #10
+      379, 761, 1523, 3049, 6101, 12203, 24407, 48817, 97649, 195311, 390647, 781301, 1562611,
+      3125257, 6250537, 12501169, 25002389, 50004791, 100009607, 200019221, 400038451, 800076929,
+      1600153859
+  };
+
+
+  static { //initializer
+    // The above prime numbers are formatted for human readability.
+    // To find numbers fast, we sort them once and for all.
+
+    Arrays.sort(primeCapacities);
+  }
+
+  /** Makes this class non instantiable, but still let's others inherit from it. */
+  private PrimeFinder() {
+  }
+
+  /**
+   * Returns a prime number which is <code>&gt;= desiredCapacity</code> and very close to <code>desiredCapacity</code>
+   * (within 11% if <code>desiredCapacity &gt;= 1000</code>).
+   *
+   * @param desiredCapacity the capacity desired by the user.
+   * @return the capacity which should be used for a hashtable.
+   */
+  public static int nextPrime(int desiredCapacity) {
+    int i = java.util.Arrays.binarySearch(primeCapacities, desiredCapacity);
+    if (i < 0) {
+      // desired capacity not found, choose next prime greater than desired capacity
+      i = -i - 1; // remember the semantics of binarySearch...
+    }
+    return primeCapacities[i];
+  }
+
+  /** Tests correctness. */
+  private static void statistics(int from, int to) {
+    // check that primes contain no accidental errors
+    for (int i = 0; i < primeCapacities.length - 1; i++) {
+      if (primeCapacities[i] >= primeCapacities[i + 1]) {
+        throw new IllegalStateException(
+            "primes are unsorted or contain duplicates; detected at " + i + '@' + primeCapacities[i]);
+      }
+    }
+
+    double accDeviation = 0.0;
+    double maxDeviation = -1.0;
+
+    for (int i = from; i <= to; i++) {
+      int primeCapacity = nextPrime(i);
+      //log.info(primeCapacity);
+      double deviation = (primeCapacity - i) / (double) i;
+
+      if (deviation > maxDeviation) {
+        maxDeviation = deviation;
+      }
+
+      accDeviation += deviation;
+    }
+    long width = 1 + (long) to - (long) from;
+
+    double meanDeviation = accDeviation / width;
+  }
+}