You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/11/23 16:14:38 UTC

svn commit: r883365 [20/47] - in /lucene/mahout/trunk: ./ examples/ matrix/ matrix/src/ matrix/src/main/ matrix/src/main/java/ matrix/src/main/java/org/ matrix/src/main/java/org/apache/ matrix/src/main/java/org/apache/mahout/ matrix/src/main/java/org/a...

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/CharArrayList.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/CharArrayList.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/CharArrayList.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/CharArrayList.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,574 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.list;
+
+import org.apache.mahout.colt.function.CharProcedure;
+/**
+Resizable list holding <code>char</code> elements; implemented with arrays.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+*/
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class CharArrayList extends AbstractCharList {
+	/**
+	 * The array buffer into which the elements of the list are stored.
+	 * The capacity of the list is the length of this array buffer.
+	 * @serial
+	 */
+	protected char[] elements;
+/**
+ * Constructs an empty list.
+ */
+public CharArrayList() {
+	this(10);
+}
+/**
+ * Constructs a list containing the specified elements. 
+ * The initial size and capacity of the list is the length of the array.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ * 
+ * @param elements the array to be backed by the the constructed list
+ */
+public CharArrayList(char[] elements) {
+	elements(elements);
+}
+/**
+ * Constructs an empty list with the specified initial capacity.
+ *
+ * @param   initialCapacity   the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+public CharArrayList(int initialCapacity) {
+	this(new char[initialCapacity]);
+	setSizeRaw(0);
+}
+/**
+ * Appends the specified element to the end of this list.
+ *
+ * @param element element to be appended to this list.
+ */
+public void add(char element) {
+	// overridden for performance only.
+	if (size == elements.length) {
+		ensureCapacity(size + 1); 
+	}
+	elements[size++] = element;
+}
+/**
+ * Inserts the specified element before the specified position into the receiver. 
+ * Shifts the element currently at that position (if any) and
+ * any subsequent elements to the right.
+ *
+ * @param index index before which the specified element is to be inserted (must be in [0,size]).
+ * @param element element to be inserted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>index &lt; 0 || index &gt; size()</tt>).
+ */
+public void beforeInsert(int index, char element) {
+	// overridden for performance only.
+	if (index > size || index < 0) 
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	ensureCapacity(size + 1);
+	System.arraycopy(elements, index, elements, index+1, size-index);
+	elements[index] = element;
+	size++;
+}
+/**
+ * Searches the receiver for the specified value using
+ * the binary search algorithm.  The receiver must <strong>must</strong> be
+ * sorted (as by the sort method) prior to making this call.  If
+ * it is not sorted, the results are undefined: in particular, the call
+ * may enter an infinite loop.  If the receiver contains multiple elements
+ * equal to the specified object, there is no guarantee which instance
+ * will be found.
+ *
+ * @param key the value to be searched for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return index of the search key, if it is contained in the receiver;
+ *	       otherwise, <tt>(-(<i>insertion point</i>) - 1)</tt>.  The <i>insertion
+ *	       point</i> is defined as the the point at which the value would
+ * 	       be inserted into the receiver: the index of the first
+ *	       element greater than the key, or <tt>receiver.size()</tt>, if all
+ *	       elements in the receiver are less than the specified key.  Note
+ *	       that this guarantees that the return value will be &gt;= 0 if
+ *	       and only if the key is found.
+ * @see org.apache.mahout.colt.Sorting
+ * @see java.util.Arrays
+ */
+public int binarySearchFromTo(char key, int from, int to) {
+	return org.apache.mahout.colt.Sorting.binarySearchFromTo(this.elements,key,from,to);
+}
+/**
+ * Returns a deep copy of the receiver. 
+ *
+ * @return  a deep copy of the receiver.
+ */
+public Object clone() {
+	// overridden for performance only.
+	CharArrayList clone = new CharArrayList((char[]) elements.clone());
+	clone.setSizeRaw(size);
+	return clone;
+}
+/**
+ * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result.
+ *
+ * @return  a deep copy of the receiver.
+ */
+public CharArrayList copy() {
+	return (CharArrayList) clone();
+}
+ /**
+ * Sorts the specified range of the receiver into ascending numerical order. 
+ *
+ * The sorting algorithm is a count sort. This algorithm offers guaranteed
+ * <dt>Performance: O(Max(n,max-min+1)).
+ * <dt>Space requirements: int[max-min+1] buffer.
+ * <p>This algorithm is only applicable if max-min+1 is not large!
+ * But if applicable, it usually outperforms quicksort by a factor of 3-4.
+ *
+ * @param from the index of the first element (inclusive) to be sorted.
+ * @param to the index of the last element (inclusive) to be sorted.
+ * @param min the smallest element contained in the range.
+ * @param max the largest element contained in the range.
+ */
+protected void countSortFromTo(int from, int to, char min, char max) {
+	if (size==0) return;
+	checkRangeFromTo(from, to, size);
+
+	final int width = (int) (max-min+1);
+	
+	int[] counts = new int[width];
+	char[] theElements = elements;	
+	for (int i=from; i<=to; ) counts[(int)(theElements[i++]-min)]++;
+
+	int fromIndex = from;
+	char val = min;
+	for (int i=0; i<width; i++, val++) {
+		int c = counts[i];
+		if (c>0) {
+			if (c==1) theElements[fromIndex++]=val;
+			else {
+				int toIndex = fromIndex + c - 1;
+				fillFromToWith(fromIndex,toIndex,val);
+				fromIndex = toIndex + 1;
+			}
+		}
+	}
+}
+/**
+ * Returns the elements currently stored, including invalid elements between size and capacity, if any.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the returned array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @return the elements currently stored.
+ */
+public char[] elements() {
+	return elements;
+}
+/**
+ * Sets the receiver's elements to be the specified array (not a copy of it).
+ *
+ * The size and capacity of the list is the length of the array.
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @param elements the new elements to be stored.
+ * @return the receiver itself.
+ */
+public AbstractCharList elements(char[] elements) {
+	this.elements=elements;
+	this.size=elements.length;
+	return this;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+	elements = org.apache.mahout.colt.Arrays.ensureCapacity(elements,minCapacity);
+}
+/**
+ * Compares the specified Object with the receiver.  
+ * Returns true if and only if the specified Object is also an ArrayList of the same type, both Lists have the
+ * same size, and all corresponding pairs of elements in the two Lists are identical.
+ * In other words, two Lists are defined to be equal if they contain the
+ * same elements in the same order.
+ *
+ * @param otherObj the Object to be compared for equality with the receiver.
+ * @return true if the specified Object is equal to the receiver.
+ */
+public boolean equals(Object otherObj) { //delta
+	// overridden for performance only.
+	if (! (otherObj instanceof CharArrayList)) return super.equals(otherObj);
+	if (this==otherObj) return true;
+	if (otherObj==null) return false;
+	CharArrayList other = (CharArrayList) otherObj;
+	if (size()!=other.size()) return false;
+
+	char[] theElements = elements();
+	char[] otherElements = other.elements();
+	for (int i=size(); --i >= 0; ) {
+	    if (theElements[i] != otherElements[i]) return false;
+	}
+	return true;
+}
+/**
+ * Applies a procedure to each element of the receiver, if any.
+ * Starts at index 0, moving rightwards.
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEach(CharProcedure procedure) {
+	// overridden for performance only.
+	char[] theElements = elements;
+	int theSize = size;
+	
+	for (int i=0; i<theSize;) if (! procedure.apply(theElements[i++])) return false;
+	return true;
+}
+/**
+ * Returns the element at the specified position in the receiver.
+ *
+ * @param index index of element to return.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * 		  &lt; 0 || index &gt;= size()).
+ */
+public char get(int index) {
+	// overridden for performance only.
+	if (index >= size || index < 0)
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	return elements[index];
+}
+/**
+ * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions. 
+ * Provided with invalid parameters this method may return invalid elements without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to return.
+ */
+public char getQuick(int index) {
+	return elements[index];
+}
+/**
+ * Returns the index of the first occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches between <code>from</code>, inclusive and <code>to</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return  the index of the first occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public int indexOfFromTo(char element, int from, int to) {
+	// overridden for performance only.
+	if (size==0) return -1;
+	checkRangeFromTo(from, to, size);
+
+	char[] theElements = elements;
+	for (int i = from ; i <= to; i++) {
+	    if (element==theElements[i]) {return i;} //found
+	}
+	return -1; //not found
+}
+/**
+ * Returns the index of the last occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches beginning at <code>to</code>, inclusive until <code>from</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return  the index of the last occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public int lastIndexOfFromTo(char element, int from, int to) {
+	// overridden for performance only.
+	if (size==0) return -1;
+	checkRangeFromTo(from, to, size);
+
+	char[] theElements = elements;
+	for (int i = to ; i >= from; i--) {
+	    if (element==theElements[i]) {return i;} //found
+	}
+	return -1; //not found
+}
+/**
+ * Returns a new list of the part of the receiver between <code>from</code>, inclusive, and <code>to</code>, inclusive.
+ * @param from the index of the first element (inclusive).
+ * @param to the index of the last element (inclusive).
+ * @return a new list
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public AbstractCharList partFromTo(int from, int to) {
+	if (size==0) return new CharArrayList(0);
+
+	checkRangeFromTo(from, to, size);
+
+	char[] part = new char[to-from+1];
+	System.arraycopy(elements, from, part, 0, to-from+1);
+	return new CharArrayList(part);
+}
+/**
+* Removes from the receiver all elements that are contained in the specified list.
+* Tests for identity.
+*
+* @param other the other list.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean removeAll(AbstractCharList other) {
+	// overridden for performance only.
+	if (! (other instanceof CharArrayList))	return super.removeAll(other);
+	
+	/* There are two possibilities to do the thing
+	   a) use other.indexOf(...)
+	   b) sort other, then use other.binarySearch(...)
+	   
+	   Let's try to figure out which one is faster. Let M=size, N=other.size, then
+	   a) takes O(M*N) steps
+	   b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+ 
+	   Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+	*/
+	if (other.size()==0) {return false;} //nothing to do
+	int limit = other.size()-1;
+	int j=0;
+	char[] theElements = elements;
+	int mySize = size();
+
+	double N=(double) other.size();
+	double M=(double) mySize;
+	if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+		// it is faster to sort other before searching in it
+		CharArrayList sortedList = (CharArrayList) other.clone();
+		sortedList.quickSort();
+
+		for (int i=0; i<mySize ; i++) {
+			if (sortedList.binarySearchFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+		}
+	}
+	else {
+		// it is faster to search in other without sorting
+		for (int i=0; i<mySize ; i++) {
+			if (other.indexOfFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+		}
+	}
+
+	boolean modified = (j!=mySize);
+	setSize(j);
+	return modified;
+}
+/**
+ * Replaces a number of elements in the receiver with the same number of elements of another list.
+ * Replaces elements in the receiver, between <code>from</code> (inclusive) and <code>to</code> (inclusive),
+ * with elements of <code>other</code>, starting from <code>otherFrom</code> (inclusive).
+ *
+ * @param from the position of the first element to be replaced in the receiver
+ * @param to the position of the last element to be replaced in the receiver
+ * @param other list holding elements to be copied into the receiver.
+ * @param otherFrom position of first element within other list to be copied.
+ */
+public void replaceFromToWithFrom(int from, int to, AbstractCharList other, int otherFrom) {
+	// overridden for performance only.
+	if (! (other instanceof CharArrayList)) {
+		// slower
+		super.replaceFromToWithFrom(from,to,other,otherFrom);
+		return;
+	}
+	int length=to-from+1;
+	if (length>0) {
+		checkRangeFromTo(from, to, size());
+		checkRangeFromTo(otherFrom,otherFrom+length-1,other.size());
+		System.arraycopy(((CharArrayList) other).elements, otherFrom, elements, from, length);
+	}
+}
+/**
+* Retains (keeps) only the elements in the receiver that are contained in the specified other list.
+* In other words, removes from the receiver all of its elements that are not contained in the
+* specified other list. 
+* @param other the other list to test against.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean retainAll(AbstractCharList other) {
+	// overridden for performance only.
+	if (! (other instanceof CharArrayList))	return super.retainAll(other);
+	
+	/* There are two possibilities to do the thing
+	   a) use other.indexOf(...)
+	   b) sort other, then use other.binarySearch(...)
+	   
+	   Let's try to figure out which one is faster. Let M=size, N=other.size, then
+	   a) takes O(M*N) steps
+	   b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+
+	   Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+	*/
+	int limit = other.size()-1;
+	int j=0;
+	char[] theElements = elements;
+	int mySize = size();
+
+	double N=(double) other.size();
+	double M=(double) mySize;
+	if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+		// it is faster to sort other before searching in it
+		CharArrayList sortedList = (CharArrayList) other.clone();
+		sortedList.quickSort();
+
+		for (int i=0; i<mySize ; i++) {
+			if (sortedList.binarySearchFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+		}
+	}
+	else {
+		// it is faster to search in other without sorting
+		for (int i=0; i<mySize ; i++) {
+			if (other.indexOfFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+		}
+	}
+
+	boolean modified = (j!=mySize);
+	setSize(j);
+	return modified;
+}
+/**
+ * Reverses the elements of the receiver.
+ * Last becomes first, second last becomes second first, and so on.
+ */
+public void reverse() {
+	// overridden for performance only.
+	char tmp;
+	int limit=size/2;
+	int j=size-1;
+
+	char[] theElements = elements;
+	for (int i=0; i<limit;) { //swap
+		tmp=theElements[i];
+		theElements[i++]=theElements[j];
+		theElements[j--]=tmp;
+	}
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * 		  &lt; 0 || index &gt;= size()).
+ */
+public void set(int index, char element) {
+	// overridden for performance only.
+	if (index >= size || index < 0)
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	elements[index] = element;
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may access invalid indexes without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ */
+public void setQuick(int index, char element) {
+	elements[index] = element;
+}
+/**
+ * Randomly permutes the part of the receiver between <code>from</code> (inclusive) and <code>to</code> (inclusive). 
+ * @param from the index of the first element (inclusive) to be permuted.
+ * @param to the index of the last element (inclusive) to be permuted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public void shuffleFromTo(int from, int to) {
+	// overridden for performance only.
+	if (size==0) {return;}
+	checkRangeFromTo(from, to, size);
+	
+	org.apache.mahout.jet.random.Uniform gen = new org.apache.mahout.jet.random.Uniform(new org.apache.mahout.jet.random.engine.DRand(new java.util.Date()));
+	char tmpElement;
+	char[] theElements = elements;
+	int random;
+	for (int i=from; i<to; i++) { 
+		random = gen.nextIntFromTo(i, to);
+
+		//swap(i, random)
+		tmpElement = theElements[random];
+		theElements[random]=theElements[i]; 
+		theElements[i]=tmpElement; 
+	}  
+}
+/**
+ * Sorts the specified range of the receiver into ascending order. 
+ *
+ * The sorting algorithm is dynamically chosen according to the characteristics of the data set.
+ * Currently quicksort and countsort are considered.
+ * Countsort is not always applicable, but if applicable, it usually outperforms quicksort by a factor of 3-4.
+ *
+ * <p>Best case performance: O(N).
+ * <dt>Worst case performance: O(N^2) (a degenerated quicksort).
+ * <dt>Best case space requirements: 0 KB. 
+ * <dt>Worst case space requirements: 40 KB.
+ *
+ * @param from the index of the first element (inclusive) to be sorted.
+ * @param to the index of the last element (inclusive) to be sorted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public void sortFromTo(int from, int to) {
+	/* 
+	 * Computes min and max and decides on this basis.
+	 * In practice the additional overhead is very small compared to the potential gains.
+	 */
+	final int widthThreshold = 10000; // never consider options resulting in outrageous memory allocations.
+	
+	if (size==0) return;
+	checkRangeFromTo(from, to, size);
+
+	// determine minimum and maximum.
+	char min=elements[from];
+	char max=elements[from];
+
+	char[] theElements = elements;
+	for (int i=from+1; i<=to; ) {
+		char elem = theElements[i++];
+		if (elem>max) max=elem;
+		else if (elem<min) min=elem;
+	}
+
+	// try to figure out which option is fastest.
+	double N = (double)to - (double)from + 1.0;
+	double quickSortEstimate = 	N * Math.log(N)/0.6931471805599453; // O(N*log(N,base=2)) ; ln(2)=0.6931471805599453
+
+	double width = (double)max - (double)min + 1.0;
+	double countSortEstimate = 	Math.max(width,N); // O(Max(width,N))
+	
+	if (width < widthThreshold && countSortEstimate < quickSortEstimate) {
+		countSortFromTo(from, to, min, max);
+	}
+	else {
+		quickSortFromTo(from, to);
+	}
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. Releases any superfluos internal memory. An application can use this operation to minimize the 
+ * storage of the receiver.
+ */
+public void trimToSize() {
+	elements = org.apache.mahout.colt.Arrays.trimToCapacity(elements,size());
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/CharArrayList.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DistinctNumberList.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DistinctNumberList.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DistinctNumberList.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DistinctNumberList.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,164 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.list;
+
+/**
+ * Resizable compressed list holding numbers; based on the fact that a number from a large list with few distinct values need not take more than <tt>log(distinctValues)</tt> bits; implemented with a <tt>MinMaxNumberList</tt>.
+ * First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+ * <p>
+ * This class can, for example, be useful when making large lists of numbers persistent.
+ * Also useful when very large lists would otherwise consume too much main memory.
+ * <p>
+ * You can add, get and set elements quite similar to <tt>java.util.ArrayList</tt>.
+ * <p>
+ * <b>Applicability:</b> Applicable if data is highly skewed and legal values are known in advance. Robust in the presence of "outliers".
+ * <p>
+ * <b>Performance:</b> Operations <tt>get()</tt>, <tt>size()</tt> and <tt>clear()</tt> are <tt>O(1)</tt>, i.e. run in constant time.
+ * Operations like <tt>add()</tt> and <tt>set()</tt> are <tt>O(log(distinctValues.length))</tt>.
+ * <p>
+ * Upon instantiation a contract is signed that defines the distinct values allowed to be hold in this list.
+ * It is not legal to store elements other than specified by the contract.
+ * Any attempt to violate the contract will throw an <tt>IllegalArgumentException</tt>.
+ * <p>
+ * Although access methods are only defined on <tt>long</tt> values you can also store
+ * all other primitive data types: <tt>boolean</tt>, <tt>byte</tt>, <tt>short</tt>, <tt>int</tt>, <tt>long</tt>, <tt>float</tt>, <tt>double</tt> and <tt>char</tt>.
+ * You can do this by explicitly representing them as <tt>long</tt> values.
+ * Use casts for discrete data types.
+ * Use the methods of <tt>java.lang.Float</tt> and <tt>java.lang.Double</tt> for floating point data types:
+ * Recall that with those methods you can convert any floating point value to a <tt>long</tt> value and back <b>without losing any precision</b>:
+ * <p>
+ * <b>Example usage:</b><pre>
+ * DistinctNumberList list = ... instantiation goes here
+ * double d1 = 1.234;
+ * list.add(Double.doubleToLongBits(d1));
+ * double d2 = Double.longBitsToDouble(list.get(0));
+ * if (d1!=d2) System.out.println("This is impossible!");
+ *
+ * DistinctNumberList list2 = ... instantiation goes here
+ * float f1 = 1.234f;
+ * list2.add((long) Float.floatToIntBits(f1));
+ * float f2 = Float.intBitsToFloat((int)list2.get(0));
+ * if (f1!=f2) System.out.println("This is impossible!");
+ * </pre>
+ *
+ * @see LongArrayList
+ * @see MinMaxNumberList
+ * @see java.lang.Float
+ * @see java.lang.Double
+ * @author wolfgang.hoschek@cern.ch
+ * @version 1.0, 09/24/99
+ */
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class DistinctNumberList extends org.apache.mahout.colt.list.AbstractLongList {
+	protected long[] distinctValues;
+	protected MinMaxNumberList elements;
+/**
+ * Constructs an empty list with the specified initial capacity and the specified distinct values allowed to be hold in this list.
+ *
+ * @param   distinctValues   an array sorted ascending containing the distinct values allowed to be hold in this list.
+ * @param   initialCapacity   the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+public DistinctNumberList(long[] distinctValues, int initialCapacity) {
+	setUp(distinctValues,initialCapacity);
+}
+/**
+ * Appends the specified element to the end of this list.
+ *
+ * @param element element to be appended to this list.
+ */
+public void add(long element) {
+	//overridden for performance only.
+	elements.add(codeOf(element));
+	size++;
+}
+/**
+ * Returns the code that shall be stored for the given element.
+ */
+protected int codeOf(long element) {
+	int index = java.util.Arrays.binarySearch(distinctValues,element);
+	if (index<0) throw new IllegalArgumentException("Element="+element+" not contained in distinct elements.");
+	return index;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+	elements.ensureCapacity(minCapacity);
+}
+/**
+ * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions. 
+ * Provided with invalid parameters this method may return invalid elements without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to return.
+ */
+public long getQuick(int index) {
+	return distinctValues[(int)(elements.getQuick(index))];
+}
+/**
+ * Removes from the receiver all elements whose index is between
+ * <code>from</code>, inclusive and <code>to</code>, inclusive.  Shifts any succeeding
+ * elements to the left (reduces their index).
+ * This call shortens the list by <tt>(to - from + 1)</tt> elements.
+ *
+ * @param from index of first element to be removed.
+ * @param to index of last element to be removed.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public void removeFromTo(int from, int to) {
+	elements.removeFromTo(from,to);
+	size -= to-from+1;
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not check preconditions. 
+ * Provided with invalid parameters this method may access invalid indexes without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ */
+public void setQuick(int index, long element) {
+	elements.setQuick(index,codeOf(element));
+}
+/**
+ * Sets the size of the receiver without modifying it otherwise.
+ * This method should not release or allocate new memory but simply set some instance variable like <tt>size</tt>.
+ */
+protected void setSizeRaw(int newSize) {
+	super.setSizeRaw(newSize);
+	elements.setSizeRaw(newSize);
+}
+/**
+ * Sets the receiver to an empty list with the specified initial capacity and the specified distinct values allowed to be hold in this list.
+ *
+ * @param   distinctValues   an array sorted ascending containing the distinct values allowed to be hold in this list.
+ * @param   initialCapacity   the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+protected void setUp(long[] distinctValues, int initialCapacity) {
+	this.distinctValues = distinctValues;
+	//java.util.Arrays.sort(this.distinctElements);
+	this.elements = new MinMaxNumberList(0,distinctValues.length-1,initialCapacity);
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. An application can use this operation to minimize the 
+ * storage of the receiver. 
+ */
+public void trimToSize() {
+	elements.trimToSize();
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DistinctNumberList.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DoubleArrayList.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DoubleArrayList.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DoubleArrayList.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DoubleArrayList.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,487 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.list;
+
+import org.apache.mahout.colt.function.DoubleProcedure;
+/**
+Resizable list holding <code>double</code> elements; implemented with arrays.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+*/
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class DoubleArrayList extends AbstractDoubleList {
+	/**
+	 * The array buffer into which the elements of the list are stored.
+	 * The capacity of the list is the length of this array buffer.
+	 * @serial
+	 */
+	protected double[] elements;
+/**
+ * Constructs an empty list.
+ */
+public DoubleArrayList() {
+	this(10);
+}
+/**
+ * Constructs a list containing the specified elements. 
+ * The initial size and capacity of the list is the length of the array.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ * 
+ * @param elements the array to be backed by the the constructed list
+ */
+public DoubleArrayList(double[] elements) {
+	elements(elements);
+}
+/**
+ * Constructs an empty list with the specified initial capacity.
+ *
+ * @param   initialCapacity   the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+public DoubleArrayList(int initialCapacity) {
+	this(new double[initialCapacity]);
+	setSizeRaw(0);
+}
+/**
+ * Appends the specified element to the end of this list.
+ *
+ * @param element element to be appended to this list.
+ */
+public void add(double element) {
+	// overridden for performance only.  
+	if (size == elements.length) ensureCapacity(size + 1); 
+	elements[size++] = element;
+}
+/**
+ * Inserts the specified element before the specified position into the receiver. 
+ * Shifts the element currently at that position (if any) and
+ * any subsequent elements to the right.
+ *
+ * @param index index before which the specified element is to be inserted (must be in [0,size]).
+ * @param element element to be inserted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>index &lt; 0 || index &gt; size()</tt>).
+ */
+public void beforeInsert(int index, double element) {
+	// overridden for performance only.
+	if (size == index) {
+		add(element);
+		return;
+	}
+	if (index > size || index < 0) 
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	ensureCapacity(size + 1);
+	System.arraycopy(elements, index, elements, index+1, size-index);
+	elements[index] = element;
+	size++;
+}
+/**
+ * Searches the receiver for the specified value using
+ * the binary search algorithm.  The receiver must <strong>must</strong> be
+ * sorted (as by the sort method) prior to making this call.  If
+ * it is not sorted, the results are undefined: in particular, the call
+ * may enter an infinite loop.  If the receiver contains multiple elements
+ * equal to the specified object, there is no guarantee which instance
+ * will be found.
+ *
+ * @param key the value to be searched for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return index of the search key, if it is contained in the receiver;
+ *	       otherwise, <tt>(-(<i>insertion point</i>) - 1)</tt>.  The <i>insertion
+ *	       point</i> is defined as the the point at which the value would
+ * 	       be inserted into the receiver: the index of the first
+ *	       element greater than the key, or <tt>receiver.size()</tt>, if all
+ *	       elements in the receiver are less than the specified key.  Note
+ *	       that this guarantees that the return value will be &gt;= 0 if
+ *	       and only if the key is found.
+ * @see org.apache.mahout.colt.Sorting
+ * @see java.util.Arrays
+ */
+public int binarySearchFromTo(double key, int from, int to) {
+	return org.apache.mahout.colt.Sorting.binarySearchFromTo(this.elements,key,from,to);
+}
+/**
+ * Returns a deep copy of the receiver. 
+ *
+ * @return  a deep copy of the receiver.
+ */
+public Object clone() {
+	// overridden for performance only.
+	DoubleArrayList clone = new DoubleArrayList((double[]) elements.clone());
+	clone.setSizeRaw(size);
+	return clone;
+}
+/**
+ * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result.
+ *
+ * @return  a deep copy of the receiver.
+ */
+public DoubleArrayList copy() {
+	return (DoubleArrayList) clone();
+}
+/**
+ * Returns the elements currently stored, including invalid elements between size and capacity, if any.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the returned array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @return the elements currently stored.
+ */
+public double[] elements() {
+	return elements;
+}
+/**
+ * Sets the receiver's elements to be the specified array (not a copy of it).
+ *
+ * The size and capacity of the list is the length of the array.
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @param elements the new elements to be stored.
+ * @return the receiver itself.
+ */
+public AbstractDoubleList elements(double[] elements) {
+	this.elements=elements;
+	this.size=elements.length;
+	return this;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+	elements = org.apache.mahout.colt.Arrays.ensureCapacity(elements,minCapacity);
+}
+/**
+ * Compares the specified Object with the receiver.  
+ * Returns true if and only if the specified Object is also an ArrayList of the same type, both Lists have the
+ * same size, and all corresponding pairs of elements in the two Lists are identical.
+ * In other words, two Lists are defined to be equal if they contain the
+ * same elements in the same order.
+ *
+ * @param otherObj the Object to be compared for equality with the receiver.
+ * @return true if the specified Object is equal to the receiver.
+ */
+public boolean equals(Object otherObj) { //delta
+	// overridden for performance only.
+	if (! (otherObj instanceof DoubleArrayList)) return super.equals(otherObj);
+	if (this==otherObj) return true;
+	if (otherObj==null) return false;
+	DoubleArrayList other = (DoubleArrayList) otherObj;
+	if (size()!=other.size()) return false;
+
+	double[] theElements = elements();
+	double[] otherElements = other.elements();
+	for (int i=size(); --i >= 0; ) {
+	    if (theElements[i] != otherElements[i]) return false;
+	}
+	return true;
+}
+/**
+ * Applies a procedure to each element of the receiver, if any.
+ * Starts at index 0, moving rightwards.
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEach(DoubleProcedure procedure) {
+	// overridden for performance only.
+	double[] theElements = elements;
+	int theSize = size;
+	
+	for (int i=0; i<theSize;) if (! procedure.apply(theElements[i++])) return false;
+	return true;
+}
+/**
+ * Returns the element at the specified position in the receiver.
+ *
+ * @param index index of element to return.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * 		  &lt; 0 || index &gt;= size()).
+ */
+public double get(int index) {
+	// overridden for performance only.
+	if (index >= size || index < 0)
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	return elements[index];
+}
+/**
+ * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions. 
+ * Provided with invalid parameters this method may return invalid elements without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to return.
+ */
+public double getQuick(int index) {
+	return elements[index];
+}
+/**
+ * Returns the index of the first occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches between <code>from</code>, inclusive and <code>to</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return  the index of the first occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public int indexOfFromTo(double element, int from, int to) {
+	// overridden for performance only.
+	if (size==0) return -1;
+	checkRangeFromTo(from, to, size);
+
+	double[] theElements = elements;
+	for (int i = from ; i <= to; i++) {
+	    if (element==theElements[i]) {return i;} //found
+	}
+	return -1; //not found
+}
+/**
+ * Returns the index of the last occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches beginning at <code>to</code>, inclusive until <code>from</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return  the index of the last occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public int lastIndexOfFromTo(double element, int from, int to) {
+	// overridden for performance only.
+	if (size==0) return -1;
+	checkRangeFromTo(from, to, size);
+
+	double[] theElements = elements;
+	for (int i = to ; i >= from; i--) {
+	    if (element==theElements[i]) {return i;} //found
+	}
+	return -1; //not found
+}
+/**
+ * Returns a new list of the part of the receiver between <code>from</code>, inclusive, and <code>to</code>, inclusive.
+ * @param from the index of the first element (inclusive).
+ * @param to the index of the last element (inclusive).
+ * @return a new list
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public AbstractDoubleList partFromTo(int from, int to) {
+	if (size==0) return new DoubleArrayList(0);
+
+	checkRangeFromTo(from, to, size);
+
+	double[] part = new double[to-from+1];
+	System.arraycopy(elements, from, part, 0, to-from+1);
+	return new DoubleArrayList(part);
+}
+/**
+* Removes from the receiver all elements that are contained in the specified list.
+* Tests for identity.
+*
+* @param other the other list.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean removeAll(AbstractDoubleList other) {
+	// overridden for performance only.
+	if (! (other instanceof DoubleArrayList))	return super.removeAll(other);
+	
+	/* There are two possibilities to do the thing
+	   a) use other.indexOf(...)
+	   b) sort other, then use other.binarySearch(...)
+	   
+	   Let's try to figure out which one is faster. Let M=size, N=other.size, then
+	   a) takes O(M*N) steps
+	   b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+ 
+	   Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+	*/
+	if (other.size()==0) {return false;} //nothing to do
+	int limit = other.size()-1;
+	int j=0;
+	double[] theElements = elements;
+	int mySize = size();
+
+	double N=(double) other.size();
+	double M=(double) mySize;
+	if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+		// it is faster to sort other before searching in it
+		DoubleArrayList sortedList = (DoubleArrayList) other.clone();
+		sortedList.quickSort();
+
+		for (int i=0; i<mySize ; i++) {
+			if (sortedList.binarySearchFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+		}
+	}
+	else {
+		// it is faster to search in other without sorting
+		for (int i=0; i<mySize ; i++) {
+			if (other.indexOfFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+		}
+	}
+
+	boolean modified = (j!=mySize);
+	setSize(j);
+	return modified;
+}
+/**
+ * Replaces a number of elements in the receiver with the same number of elements of another list.
+ * Replaces elements in the receiver, between <code>from</code> (inclusive) and <code>to</code> (inclusive),
+ * with elements of <code>other</code>, starting from <code>otherFrom</code> (inclusive).
+ *
+ * @param from the position of the first element to be replaced in the receiver
+ * @param to the position of the last element to be replaced in the receiver
+ * @param other list holding elements to be copied into the receiver.
+ * @param otherFrom position of first element within other list to be copied.
+ */
+public void replaceFromToWithFrom(int from, int to, AbstractDoubleList other, int otherFrom) {
+	// overridden for performance only.
+	if (! (other instanceof DoubleArrayList)) {
+		// slower
+		super.replaceFromToWithFrom(from,to,other,otherFrom);
+		return;
+	}
+	int length=to-from+1;
+	if (length>0) {
+		checkRangeFromTo(from, to, size());
+		checkRangeFromTo(otherFrom,otherFrom+length-1,other.size());
+		System.arraycopy(((DoubleArrayList) other).elements, otherFrom, elements, from, length);
+	}
+}
+/**
+* Retains (keeps) only the elements in the receiver that are contained in the specified other list.
+* In other words, removes from the receiver all of its elements that are not contained in the
+* specified other list. 
+* @param other the other list to test against.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean retainAll(AbstractDoubleList other) {
+	// overridden for performance only.
+	if (! (other instanceof DoubleArrayList))	return super.retainAll(other);
+	
+	/* There are two possibilities to do the thing
+	   a) use other.indexOf(...)
+	   b) sort other, then use other.binarySearch(...)
+	   
+	   Let's try to figure out which one is faster. Let M=size, N=other.size, then
+	   a) takes O(M*N) steps
+	   b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+
+	   Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+	*/
+	int limit = other.size()-1;
+	int j=0;
+	double[] theElements = elements;
+	int mySize = size();
+
+	double N=(double) other.size();
+	double M=(double) mySize;
+	if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+		// it is faster to sort other before searching in it
+		DoubleArrayList sortedList = (DoubleArrayList) other.clone();
+		sortedList.quickSort();
+
+		for (int i=0; i<mySize ; i++) {
+			if (sortedList.binarySearchFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+		}
+	}
+	else {
+		// it is faster to search in other without sorting
+		for (int i=0; i<mySize ; i++) {
+			if (other.indexOfFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+		}
+	}
+
+	boolean modified = (j!=mySize);
+	setSize(j);
+	return modified;
+}
+/**
+ * Reverses the elements of the receiver.
+ * Last becomes first, second last becomes second first, and so on.
+ */
+public void reverse() {
+	// overridden for performance only.
+	double tmp;
+	int limit=size/2;
+	int j=size-1;
+
+	double[] theElements = elements;
+	for (int i=0; i<limit;) { //swap
+		tmp=theElements[i];
+		theElements[i++]=theElements[j];
+		theElements[j--]=tmp;
+	}
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * 		  &lt; 0 || index &gt;= size()).
+ */
+public void set(int index, double element) {
+	// overridden for performance only.
+	if (index >= size || index < 0)
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	elements[index] = element;
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may access invalid indexes without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ */
+public void setQuick(int index, double element) {
+	elements[index] = element;
+}
+/**
+ * Randomly permutes the part of the receiver between <code>from</code> (inclusive) and <code>to</code> (inclusive). 
+ * @param from the index of the first element (inclusive) to be permuted.
+ * @param to the index of the last element (inclusive) to be permuted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public void shuffleFromTo(int from, int to) {
+	// overridden for performance only.
+	if (size==0) {return;}
+	checkRangeFromTo(from, to, size);
+
+	org.apache.mahout.jet.random.Uniform gen = new org.apache.mahout.jet.random.Uniform(new org.apache.mahout.jet.random.engine.DRand(new java.util.Date()));
+	double tmpElement;
+	double[] theElements = elements;
+	int random;
+	for (int i=from; i<to; i++) { 
+		random = gen.nextIntFromTo(i, to);
+
+		//swap(i, random)
+		tmpElement = theElements[random];
+		theElements[random]=theElements[i]; 
+		theElements[i]=tmpElement; 
+	}  
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. Releases any superfluos internal memory. An application can use this operation to minimize the 
+ * storage of the receiver.
+ */
+public void trimToSize() {
+	elements = org.apache.mahout.colt.Arrays.trimToCapacity(elements,size());
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DoubleArrayList.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/FloatArrayList.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/FloatArrayList.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/FloatArrayList.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/FloatArrayList.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,483 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.list;
+
+import org.apache.mahout.colt.function.FloatProcedure;
+/**
+Resizable list holding <code>float</code> elements; implemented with arrays.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+*/
+/** 
+ * @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class FloatArrayList extends AbstractFloatList {
+	/**
+	 * The array buffer into which the elements of the list are stored.
+	 * The capacity of the list is the length of this array buffer.
+	 * @serial
+	 */
+	protected float[] elements;
+/**
+ * Constructs an empty list.
+ */
+public FloatArrayList() {
+	this(10);
+}
+/**
+ * Constructs a list containing the specified elements. 
+ * The initial size and capacity of the list is the length of the array.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ * 
+ * @param elements the array to be backed by the the constructed list
+ */
+public FloatArrayList(float[] elements) {
+	elements(elements);
+}
+/**
+ * Constructs an empty list with the specified initial capacity.
+ *
+ * @param   initialCapacity   the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+public FloatArrayList(int initialCapacity) {
+	this(new float[initialCapacity]);
+	setSizeRaw(0);
+}
+/**
+ * Appends the specified element to the end of this list.
+ *
+ * @param element element to be appended to this list.
+ */
+public void add(float element) {
+	// overridden for performance only.
+	if (size == elements.length) ensureCapacity(size + 1); 
+	elements[size++] = element;
+}
+/**
+ * Inserts the specified element before the specified position into the receiver. 
+ * Shifts the element currently at that position (if any) and
+ * any subsequent elements to the right.
+ *
+ * @param index index before which the specified element is to be inserted (must be in [0,size]).
+ * @param element element to be inserted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>index &lt; 0 || index &gt; size()</tt>).
+ */
+public void beforeInsert(int index, float element) {
+	// overridden for performance only.
+	if (index > size || index < 0) 
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	ensureCapacity(size + 1);
+	System.arraycopy(elements, index, elements, index+1, size-index);
+	elements[index] = element;
+	size++;
+}
+/**
+ * Searches the receiver for the specified value using
+ * the binary search algorithm.  The receiver must <strong>must</strong> be
+ * sorted (as by the sort method) prior to making this call.  If
+ * it is not sorted, the results are undefined: in particular, the call
+ * may enter an infinite loop.  If the receiver contains multiple elements
+ * equal to the specified object, there is no guarantee which instance
+ * will be found.
+ *
+ * @param key the value to be searched for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return index of the search key, if it is contained in the receiver;
+ *	       otherwise, <tt>(-(<i>insertion point</i>) - 1)</tt>.  The <i>insertion
+ *	       point</i> is defined as the the point at which the value would
+ * 	       be inserted into the receiver: the index of the first
+ *	       element greater than the key, or <tt>receiver.size()</tt>, if all
+ *	       elements in the receiver are less than the specified key.  Note
+ *	       that this guarantees that the return value will be &gt;= 0 if
+ *	       and only if the key is found.
+ * @see org.apache.mahout.colt.Sorting
+ * @see java.util.Arrays
+ */
+public int binarySearchFromTo(float key, int from, int to) {
+	return org.apache.mahout.colt.Sorting.binarySearchFromTo(this.elements,key,from,to);
+}
+/**
+ * Returns a deep copy of the receiver. 
+ *
+ * @return  a deep copy of the receiver.
+ */
+public Object clone() {
+	// overridden for performance only.
+	FloatArrayList clone = new FloatArrayList((float[]) elements.clone());
+	clone.setSizeRaw(size);
+	return clone;
+}
+/**
+ * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result.
+ *
+ * @return  a deep copy of the receiver.
+ */
+public FloatArrayList copy() {
+	return (FloatArrayList) clone();
+}
+/**
+ * Returns the elements currently stored, including invalid elements between size and capacity, if any.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the returned array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @return the elements currently stored.
+ */
+public float[] elements() {
+	return elements;
+}
+/**
+ * Sets the receiver's elements to be the specified array (not a copy of it).
+ *
+ * The size and capacity of the list is the length of the array.
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @param elements the new elements to be stored.
+ * @return the receiver itself.
+ */
+public AbstractFloatList elements(float[] elements) {
+	this.elements=elements;
+	this.size=elements.length;
+	return this;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ *
+ * @param   minCapacity   the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+	elements = org.apache.mahout.colt.Arrays.ensureCapacity(elements,minCapacity);
+}
+/**
+ * Compares the specified Object with the receiver.  
+ * Returns true if and only if the specified Object is also an ArrayList of the same type, both Lists have the
+ * same size, and all corresponding pairs of elements in the two Lists are identical.
+ * In other words, two Lists are defined to be equal if they contain the
+ * same elements in the same order.
+ *
+ * @param otherObj the Object to be compared for equality with the receiver.
+ * @return true if the specified Object is equal to the receiver.
+ */
+public boolean equals(Object otherObj) { //delta
+	// overridden for performance only.
+	if (! (otherObj instanceof FloatArrayList)) return super.equals(otherObj);
+	if (this==otherObj) return true;
+	if (otherObj==null) return false;
+	FloatArrayList other = (FloatArrayList) otherObj;
+	if (size()!=other.size()) return false;
+
+	float[] theElements = elements();
+	float[] otherElements = other.elements();
+	for (int i=size(); --i >= 0; ) {
+	    if (theElements[i] != otherElements[i]) return false;
+	}
+	return true;
+}
+/**
+ * Applies a procedure to each element of the receiver, if any.
+ * Starts at index 0, moving rightwards.
+ * @param procedure    the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues. 
+ * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise. 
+ */
+public boolean forEach(FloatProcedure procedure) {
+	// overridden for performance only.
+	float[] theElements = elements;
+	int theSize = size;
+	
+	for (int i=0; i<theSize;) if (! procedure.apply(theElements[i++])) return false;
+	return true;
+}
+/**
+ * Returns the element at the specified position in the receiver.
+ *
+ * @param index index of element to return.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * 		  &lt; 0 || index &gt;= size()).
+ */
+public float get(int index) {
+	// overridden for performance only.
+	if (index >= size || index < 0)
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	return elements[index];
+}
+/**
+ * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions. 
+ * Provided with invalid parameters this method may return invalid elements without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to return.
+ */
+public float getQuick(int index) {
+	return elements[index];
+}
+/**
+ * Returns the index of the first occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches between <code>from</code>, inclusive and <code>to</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return  the index of the first occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public int indexOfFromTo(float element, int from, int to) {
+	// overridden for performance only.
+	if (size==0) return -1;
+	checkRangeFromTo(from, to, size);
+
+	float[] theElements = elements;
+	for (int i = from ; i <= to; i++) {
+	    if (element==theElements[i]) {return i;} //found
+	}
+	return -1; //not found
+}
+/**
+ * Returns the index of the last occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches beginning at <code>to</code>, inclusive until <code>from</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return  the index of the last occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public int lastIndexOfFromTo(float element, int from, int to) {
+	// overridden for performance only.
+	if (size==0) return -1;
+	checkRangeFromTo(from, to, size);
+
+	float[] theElements = elements;
+	for (int i = to ; i >= from; i--) {
+	    if (element==theElements[i]) {return i;} //found
+	}
+	return -1; //not found
+}
+/**
+ * Returns a new list of the part of the receiver between <code>from</code>, inclusive, and <code>to</code>, inclusive.
+ * @param from the index of the first element (inclusive).
+ * @param to the index of the last element (inclusive).
+ * @return a new list
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public AbstractFloatList partFromTo(int from, int to) {
+	if (size==0) return new FloatArrayList(0);
+
+	checkRangeFromTo(from, to, size);
+
+	float[] part = new float[to-from+1];
+	System.arraycopy(elements, from, part, 0, to-from+1);
+	return new FloatArrayList(part);
+}
+/**
+* Removes from the receiver all elements that are contained in the specified list.
+* Tests for identity.
+*
+* @param other the other list.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean removeAll(AbstractFloatList other) {
+	// overridden for performance only.
+	if (! (other instanceof FloatArrayList))	return super.removeAll(other);
+	
+	/* There are two possibilities to do the thing
+	   a) use other.indexOf(...)
+	   b) sort other, then use other.binarySearch(...)
+	   
+	   Let's try to figure out which one is faster. Let M=size, N=other.size, then
+	   a) takes O(M*N) steps
+	   b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+ 
+	   Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+	*/
+	if (other.size()==0) {return false;} //nothing to do
+	int limit = other.size()-1;
+	int j=0;
+	float[] theElements = elements;
+	int mySize = size();
+
+	double N=(double) other.size();
+	double M=(double) mySize;
+	if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+		// it is faster to sort other before searching in it
+		FloatArrayList sortedList = (FloatArrayList) other.clone();
+		sortedList.quickSort();
+
+		for (int i=0; i<mySize ; i++) {
+			if (sortedList.binarySearchFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+		}
+	}
+	else {
+		// it is faster to search in other without sorting
+		for (int i=0; i<mySize ; i++) {
+			if (other.indexOfFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+		}
+	}
+
+	boolean modified = (j!=mySize);
+	setSize(j);
+	return modified;
+}
+/**
+ * Replaces a number of elements in the receiver with the same number of elements of another list.
+ * Replaces elements in the receiver, between <code>from</code> (inclusive) and <code>to</code> (inclusive),
+ * with elements of <code>other</code>, starting from <code>otherFrom</code> (inclusive).
+ *
+ * @param from the position of the first element to be replaced in the receiver
+ * @param to the position of the last element to be replaced in the receiver
+ * @param other list holding elements to be copied into the receiver.
+ * @param otherFrom position of first element within other list to be copied.
+ */
+public void replaceFromToWithFrom(int from, int to, AbstractFloatList other, int otherFrom) {
+	// overridden for performance only.
+	if (! (other instanceof FloatArrayList)) {
+		// slower
+		super.replaceFromToWithFrom(from,to,other,otherFrom);
+		return;
+	}
+	int length=to-from+1;
+	if (length>0) {
+		checkRangeFromTo(from, to, size());
+		checkRangeFromTo(otherFrom,otherFrom+length-1,other.size());
+		System.arraycopy(((FloatArrayList) other).elements, otherFrom, elements, from, length);
+	}
+}
+/**
+* Retains (keeps) only the elements in the receiver that are contained in the specified other list.
+* In other words, removes from the receiver all of its elements that are not contained in the
+* specified other list. 
+* @param other the other list to test against.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean retainAll(AbstractFloatList other) {
+	// overridden for performance only.
+	if (! (other instanceof FloatArrayList))	return super.retainAll(other);
+	
+	/* There are two possibilities to do the thing
+	   a) use other.indexOf(...)
+	   b) sort other, then use other.binarySearch(...)
+	   
+	   Let's try to figure out which one is faster. Let M=size, N=other.size, then
+	   a) takes O(M*N) steps
+	   b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+
+	   Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+	*/
+	int limit = other.size()-1;
+	int j=0;
+	float[] theElements = elements;
+	int mySize = size();
+
+	double N=(double) other.size();
+	double M=(double) mySize;
+	if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+		// it is faster to sort other before searching in it
+		FloatArrayList sortedList = (FloatArrayList) other.clone();
+		sortedList.quickSort();
+
+		for (int i=0; i<mySize ; i++) {
+			if (sortedList.binarySearchFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+		}
+	}
+	else {
+		// it is faster to search in other without sorting
+		for (int i=0; i<mySize ; i++) {
+			if (other.indexOfFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+		}
+	}
+
+	boolean modified = (j!=mySize);
+	setSize(j);
+	return modified;
+}
+/**
+ * Reverses the elements of the receiver.
+ * Last becomes first, second last becomes second first, and so on.
+ */
+public void reverse() {
+	// overridden for performance only.
+	float tmp;
+	int limit=size/2;
+	int j=size-1;
+
+	float[] theElements = elements;
+	for (int i=0; i<limit;) { //swap
+		tmp=theElements[i];
+		theElements[i++]=theElements[j];
+		theElements[j--]=tmp;
+	}
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * 		  &lt; 0 || index &gt;= size()).
+ */
+public void set(int index, float element) {
+	// overridden for performance only.
+	if (index >= size || index < 0)
+		throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+	elements[index] = element;
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may access invalid indexes without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index &gt;= 0 && index &lt; size()</tt>.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ */
+public void setQuick(int index, float element) {
+	elements[index] = element;
+}
+/**
+ * Randomly permutes the part of the receiver between <code>from</code> (inclusive) and <code>to</code> (inclusive). 
+ * @param from the index of the first element (inclusive) to be permuted.
+ * @param to the index of the last element (inclusive) to be permuted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()&gt;0 && (from&lt;0 || from&gt;to || to&gt;=size())</tt>).
+ */
+public void shuffleFromTo(int from, int to) {
+	// overridden for performance only.
+	if (size==0) {return;}
+	checkRangeFromTo(from, to, size);
+	
+	org.apache.mahout.jet.random.Uniform gen = new org.apache.mahout.jet.random.Uniform(new org.apache.mahout.jet.random.engine.DRand(new java.util.Date()));
+	float tmpElement;
+	float[] theElements = elements;
+	int random;
+	for (int i=from; i<to; i++) { 
+		random = gen.nextIntFromTo(i, to);
+
+		//swap(i, random)
+		tmpElement = theElements[random];
+		theElements[random]=theElements[i]; 
+		theElements[i]=tmpElement; 
+	}  
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current 
+ * size. Releases any superfluos internal memory. An application can use this operation to minimize the 
+ * storage of the receiver.
+ */
+public void trimToSize() {
+	elements = org.apache.mahout.colt.Arrays.trimToCapacity(elements,size());
+}
+}

Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/FloatArrayList.java
------------------------------------------------------------------------------
    svn:eol-style = native