You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/11/23 16:14:38 UTC
svn commit: r883365 [20/47] - in /lucene/mahout/trunk: ./ examples/ matrix/
matrix/src/ matrix/src/main/ matrix/src/main/java/
matrix/src/main/java/org/ matrix/src/main/java/org/apache/
matrix/src/main/java/org/apache/mahout/ matrix/src/main/java/org/a...
Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/CharArrayList.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/CharArrayList.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/CharArrayList.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/CharArrayList.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,574 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.list;
+
+import org.apache.mahout.colt.function.CharProcedure;
+/**
+Resizable list holding <code>char</code> elements; implemented with arrays.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+*/
+/**
+ * @deprecated until unit tests are in place. Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class CharArrayList extends AbstractCharList {
+ /**
+ * The array buffer into which the elements of the list are stored.
+ * The capacity of the list is the length of this array buffer.
+ * @serial
+ */
+ protected char[] elements;
+/**
+ * Constructs an empty list.
+ */
+public CharArrayList() {
+ this(10);
+}
+/**
+ * Constructs a list containing the specified elements.
+ * The initial size and capacity of the list is the length of the array.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @param elements the array to be backed by the the constructed list
+ */
+public CharArrayList(char[] elements) {
+ elements(elements);
+}
+/**
+ * Constructs an empty list with the specified initial capacity.
+ *
+ * @param initialCapacity the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+public CharArrayList(int initialCapacity) {
+ this(new char[initialCapacity]);
+ setSizeRaw(0);
+}
+/**
+ * Appends the specified element to the end of this list.
+ *
+ * @param element element to be appended to this list.
+ */
+public void add(char element) {
+ // overridden for performance only.
+ if (size == elements.length) {
+ ensureCapacity(size + 1);
+ }
+ elements[size++] = element;
+}
+/**
+ * Inserts the specified element before the specified position into the receiver.
+ * Shifts the element currently at that position (if any) and
+ * any subsequent elements to the right.
+ *
+ * @param index index before which the specified element is to be inserted (must be in [0,size]).
+ * @param element element to be inserted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>index < 0 || index > size()</tt>).
+ */
+public void beforeInsert(int index, char element) {
+ // overridden for performance only.
+ if (index > size || index < 0)
+ throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+ ensureCapacity(size + 1);
+ System.arraycopy(elements, index, elements, index+1, size-index);
+ elements[index] = element;
+ size++;
+}
+/**
+ * Searches the receiver for the specified value using
+ * the binary search algorithm. The receiver must <strong>must</strong> be
+ * sorted (as by the sort method) prior to making this call. If
+ * it is not sorted, the results are undefined: in particular, the call
+ * may enter an infinite loop. If the receiver contains multiple elements
+ * equal to the specified object, there is no guarantee which instance
+ * will be found.
+ *
+ * @param key the value to be searched for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return index of the search key, if it is contained in the receiver;
+ * otherwise, <tt>(-(<i>insertion point</i>) - 1)</tt>. The <i>insertion
+ * point</i> is defined as the the point at which the value would
+ * be inserted into the receiver: the index of the first
+ * element greater than the key, or <tt>receiver.size()</tt>, if all
+ * elements in the receiver are less than the specified key. Note
+ * that this guarantees that the return value will be >= 0 if
+ * and only if the key is found.
+ * @see org.apache.mahout.colt.Sorting
+ * @see java.util.Arrays
+ */
+public int binarySearchFromTo(char key, int from, int to) {
+ return org.apache.mahout.colt.Sorting.binarySearchFromTo(this.elements,key,from,to);
+}
+/**
+ * Returns a deep copy of the receiver.
+ *
+ * @return a deep copy of the receiver.
+ */
+public Object clone() {
+ // overridden for performance only.
+ CharArrayList clone = new CharArrayList((char[]) elements.clone());
+ clone.setSizeRaw(size);
+ return clone;
+}
+/**
+ * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result.
+ *
+ * @return a deep copy of the receiver.
+ */
+public CharArrayList copy() {
+ return (CharArrayList) clone();
+}
+ /**
+ * Sorts the specified range of the receiver into ascending numerical order.
+ *
+ * The sorting algorithm is a count sort. This algorithm offers guaranteed
+ * <dt>Performance: O(Max(n,max-min+1)).
+ * <dt>Space requirements: int[max-min+1] buffer.
+ * <p>This algorithm is only applicable if max-min+1 is not large!
+ * But if applicable, it usually outperforms quicksort by a factor of 3-4.
+ *
+ * @param from the index of the first element (inclusive) to be sorted.
+ * @param to the index of the last element (inclusive) to be sorted.
+ * @param min the smallest element contained in the range.
+ * @param max the largest element contained in the range.
+ */
+protected void countSortFromTo(int from, int to, char min, char max) {
+ if (size==0) return;
+ checkRangeFromTo(from, to, size);
+
+ final int width = (int) (max-min+1);
+
+ int[] counts = new int[width];
+ char[] theElements = elements;
+ for (int i=from; i<=to; ) counts[(int)(theElements[i++]-min)]++;
+
+ int fromIndex = from;
+ char val = min;
+ for (int i=0; i<width; i++, val++) {
+ int c = counts[i];
+ if (c>0) {
+ if (c==1) theElements[fromIndex++]=val;
+ else {
+ int toIndex = fromIndex + c - 1;
+ fillFromToWith(fromIndex,toIndex,val);
+ fromIndex = toIndex + 1;
+ }
+ }
+ }
+}
+/**
+ * Returns the elements currently stored, including invalid elements between size and capacity, if any.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the returned array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @return the elements currently stored.
+ */
+public char[] elements() {
+ return elements;
+}
+/**
+ * Sets the receiver's elements to be the specified array (not a copy of it).
+ *
+ * The size and capacity of the list is the length of the array.
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @param elements the new elements to be stored.
+ * @return the receiver itself.
+ */
+public AbstractCharList elements(char[] elements) {
+ this.elements=elements;
+ this.size=elements.length;
+ return this;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ *
+ * @param minCapacity the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+ elements = org.apache.mahout.colt.Arrays.ensureCapacity(elements,minCapacity);
+}
+/**
+ * Compares the specified Object with the receiver.
+ * Returns true if and only if the specified Object is also an ArrayList of the same type, both Lists have the
+ * same size, and all corresponding pairs of elements in the two Lists are identical.
+ * In other words, two Lists are defined to be equal if they contain the
+ * same elements in the same order.
+ *
+ * @param otherObj the Object to be compared for equality with the receiver.
+ * @return true if the specified Object is equal to the receiver.
+ */
+public boolean equals(Object otherObj) { //delta
+ // overridden for performance only.
+ if (! (otherObj instanceof CharArrayList)) return super.equals(otherObj);
+ if (this==otherObj) return true;
+ if (otherObj==null) return false;
+ CharArrayList other = (CharArrayList) otherObj;
+ if (size()!=other.size()) return false;
+
+ char[] theElements = elements();
+ char[] otherElements = other.elements();
+ for (int i=size(); --i >= 0; ) {
+ if (theElements[i] != otherElements[i]) return false;
+ }
+ return true;
+}
+/**
+ * Applies a procedure to each element of the receiver, if any.
+ * Starts at index 0, moving rightwards.
+ * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues.
+ * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise.
+ */
+public boolean forEach(CharProcedure procedure) {
+ // overridden for performance only.
+ char[] theElements = elements;
+ int theSize = size;
+
+ for (int i=0; i<theSize;) if (! procedure.apply(theElements[i++])) return false;
+ return true;
+}
+/**
+ * Returns the element at the specified position in the receiver.
+ *
+ * @param index index of element to return.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * < 0 || index >= size()).
+ */
+public char get(int index) {
+ // overridden for performance only.
+ if (index >= size || index < 0)
+ throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+ return elements[index];
+}
+/**
+ * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may return invalid elements without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index >= 0 && index < size()</tt>.
+ *
+ * @param index index of element to return.
+ */
+public char getQuick(int index) {
+ return elements[index];
+}
+/**
+ * Returns the index of the first occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches between <code>from</code>, inclusive and <code>to</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return the index of the first occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public int indexOfFromTo(char element, int from, int to) {
+ // overridden for performance only.
+ if (size==0) return -1;
+ checkRangeFromTo(from, to, size);
+
+ char[] theElements = elements;
+ for (int i = from ; i <= to; i++) {
+ if (element==theElements[i]) {return i;} //found
+ }
+ return -1; //not found
+}
+/**
+ * Returns the index of the last occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches beginning at <code>to</code>, inclusive until <code>from</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return the index of the last occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public int lastIndexOfFromTo(char element, int from, int to) {
+ // overridden for performance only.
+ if (size==0) return -1;
+ checkRangeFromTo(from, to, size);
+
+ char[] theElements = elements;
+ for (int i = to ; i >= from; i--) {
+ if (element==theElements[i]) {return i;} //found
+ }
+ return -1; //not found
+}
+/**
+ * Returns a new list of the part of the receiver between <code>from</code>, inclusive, and <code>to</code>, inclusive.
+ * @param from the index of the first element (inclusive).
+ * @param to the index of the last element (inclusive).
+ * @return a new list
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public AbstractCharList partFromTo(int from, int to) {
+ if (size==0) return new CharArrayList(0);
+
+ checkRangeFromTo(from, to, size);
+
+ char[] part = new char[to-from+1];
+ System.arraycopy(elements, from, part, 0, to-from+1);
+ return new CharArrayList(part);
+}
+/**
+* Removes from the receiver all elements that are contained in the specified list.
+* Tests for identity.
+*
+* @param other the other list.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean removeAll(AbstractCharList other) {
+ // overridden for performance only.
+ if (! (other instanceof CharArrayList)) return super.removeAll(other);
+
+ /* There are two possibilities to do the thing
+ a) use other.indexOf(...)
+ b) sort other, then use other.binarySearch(...)
+
+ Let's try to figure out which one is faster. Let M=size, N=other.size, then
+ a) takes O(M*N) steps
+ b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+
+ Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+ */
+ if (other.size()==0) {return false;} //nothing to do
+ int limit = other.size()-1;
+ int j=0;
+ char[] theElements = elements;
+ int mySize = size();
+
+ double N=(double) other.size();
+ double M=(double) mySize;
+ if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+ // it is faster to sort other before searching in it
+ CharArrayList sortedList = (CharArrayList) other.clone();
+ sortedList.quickSort();
+
+ for (int i=0; i<mySize ; i++) {
+ if (sortedList.binarySearchFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+ }
+ }
+ else {
+ // it is faster to search in other without sorting
+ for (int i=0; i<mySize ; i++) {
+ if (other.indexOfFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+ }
+ }
+
+ boolean modified = (j!=mySize);
+ setSize(j);
+ return modified;
+}
+/**
+ * Replaces a number of elements in the receiver with the same number of elements of another list.
+ * Replaces elements in the receiver, between <code>from</code> (inclusive) and <code>to</code> (inclusive),
+ * with elements of <code>other</code>, starting from <code>otherFrom</code> (inclusive).
+ *
+ * @param from the position of the first element to be replaced in the receiver
+ * @param to the position of the last element to be replaced in the receiver
+ * @param other list holding elements to be copied into the receiver.
+ * @param otherFrom position of first element within other list to be copied.
+ */
+public void replaceFromToWithFrom(int from, int to, AbstractCharList other, int otherFrom) {
+ // overridden for performance only.
+ if (! (other instanceof CharArrayList)) {
+ // slower
+ super.replaceFromToWithFrom(from,to,other,otherFrom);
+ return;
+ }
+ int length=to-from+1;
+ if (length>0) {
+ checkRangeFromTo(from, to, size());
+ checkRangeFromTo(otherFrom,otherFrom+length-1,other.size());
+ System.arraycopy(((CharArrayList) other).elements, otherFrom, elements, from, length);
+ }
+}
+/**
+* Retains (keeps) only the elements in the receiver that are contained in the specified other list.
+* In other words, removes from the receiver all of its elements that are not contained in the
+* specified other list.
+* @param other the other list to test against.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean retainAll(AbstractCharList other) {
+ // overridden for performance only.
+ if (! (other instanceof CharArrayList)) return super.retainAll(other);
+
+ /* There are two possibilities to do the thing
+ a) use other.indexOf(...)
+ b) sort other, then use other.binarySearch(...)
+
+ Let's try to figure out which one is faster. Let M=size, N=other.size, then
+ a) takes O(M*N) steps
+ b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+
+ Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+ */
+ int limit = other.size()-1;
+ int j=0;
+ char[] theElements = elements;
+ int mySize = size();
+
+ double N=(double) other.size();
+ double M=(double) mySize;
+ if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+ // it is faster to sort other before searching in it
+ CharArrayList sortedList = (CharArrayList) other.clone();
+ sortedList.quickSort();
+
+ for (int i=0; i<mySize ; i++) {
+ if (sortedList.binarySearchFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+ }
+ }
+ else {
+ // it is faster to search in other without sorting
+ for (int i=0; i<mySize ; i++) {
+ if (other.indexOfFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+ }
+ }
+
+ boolean modified = (j!=mySize);
+ setSize(j);
+ return modified;
+}
+/**
+ * Reverses the elements of the receiver.
+ * Last becomes first, second last becomes second first, and so on.
+ */
+public void reverse() {
+ // overridden for performance only.
+ char tmp;
+ int limit=size/2;
+ int j=size-1;
+
+ char[] theElements = elements;
+ for (int i=0; i<limit;) { //swap
+ tmp=theElements[i];
+ theElements[i++]=theElements[j];
+ theElements[j--]=tmp;
+ }
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * < 0 || index >= size()).
+ */
+public void set(int index, char element) {
+ // overridden for performance only.
+ if (index >= size || index < 0)
+ throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+ elements[index] = element;
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may access invalid indexes without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index >= 0 && index < size()</tt>.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ */
+public void setQuick(int index, char element) {
+ elements[index] = element;
+}
+/**
+ * Randomly permutes the part of the receiver between <code>from</code> (inclusive) and <code>to</code> (inclusive).
+ * @param from the index of the first element (inclusive) to be permuted.
+ * @param to the index of the last element (inclusive) to be permuted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public void shuffleFromTo(int from, int to) {
+ // overridden for performance only.
+ if (size==0) {return;}
+ checkRangeFromTo(from, to, size);
+
+ org.apache.mahout.jet.random.Uniform gen = new org.apache.mahout.jet.random.Uniform(new org.apache.mahout.jet.random.engine.DRand(new java.util.Date()));
+ char tmpElement;
+ char[] theElements = elements;
+ int random;
+ for (int i=from; i<to; i++) {
+ random = gen.nextIntFromTo(i, to);
+
+ //swap(i, random)
+ tmpElement = theElements[random];
+ theElements[random]=theElements[i];
+ theElements[i]=tmpElement;
+ }
+}
+/**
+ * Sorts the specified range of the receiver into ascending order.
+ *
+ * The sorting algorithm is dynamically chosen according to the characteristics of the data set.
+ * Currently quicksort and countsort are considered.
+ * Countsort is not always applicable, but if applicable, it usually outperforms quicksort by a factor of 3-4.
+ *
+ * <p>Best case performance: O(N).
+ * <dt>Worst case performance: O(N^2) (a degenerated quicksort).
+ * <dt>Best case space requirements: 0 KB.
+ * <dt>Worst case space requirements: 40 KB.
+ *
+ * @param from the index of the first element (inclusive) to be sorted.
+ * @param to the index of the last element (inclusive) to be sorted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public void sortFromTo(int from, int to) {
+ /*
+ * Computes min and max and decides on this basis.
+ * In practice the additional overhead is very small compared to the potential gains.
+ */
+ final int widthThreshold = 10000; // never consider options resulting in outrageous memory allocations.
+
+ if (size==0) return;
+ checkRangeFromTo(from, to, size);
+
+ // determine minimum and maximum.
+ char min=elements[from];
+ char max=elements[from];
+
+ char[] theElements = elements;
+ for (int i=from+1; i<=to; ) {
+ char elem = theElements[i++];
+ if (elem>max) max=elem;
+ else if (elem<min) min=elem;
+ }
+
+ // try to figure out which option is fastest.
+ double N = (double)to - (double)from + 1.0;
+ double quickSortEstimate = N * Math.log(N)/0.6931471805599453; // O(N*log(N,base=2)) ; ln(2)=0.6931471805599453
+
+ double width = (double)max - (double)min + 1.0;
+ double countSortEstimate = Math.max(width,N); // O(Max(width,N))
+
+ if (width < widthThreshold && countSortEstimate < quickSortEstimate) {
+ countSortFromTo(from, to, min, max);
+ }
+ else {
+ quickSortFromTo(from, to);
+ }
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current
+ * size. Releases any superfluos internal memory. An application can use this operation to minimize the
+ * storage of the receiver.
+ */
+public void trimToSize() {
+ elements = org.apache.mahout.colt.Arrays.trimToCapacity(elements,size());
+}
+}
Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/CharArrayList.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DistinctNumberList.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DistinctNumberList.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DistinctNumberList.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DistinctNumberList.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,164 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.list;
+
+/**
+ * Resizable compressed list holding numbers; based on the fact that a number from a large list with few distinct values need not take more than <tt>log(distinctValues)</tt> bits; implemented with a <tt>MinMaxNumberList</tt>.
+ * First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+ * <p>
+ * This class can, for example, be useful when making large lists of numbers persistent.
+ * Also useful when very large lists would otherwise consume too much main memory.
+ * <p>
+ * You can add, get and set elements quite similar to <tt>java.util.ArrayList</tt>.
+ * <p>
+ * <b>Applicability:</b> Applicable if data is highly skewed and legal values are known in advance. Robust in the presence of "outliers".
+ * <p>
+ * <b>Performance:</b> Operations <tt>get()</tt>, <tt>size()</tt> and <tt>clear()</tt> are <tt>O(1)</tt>, i.e. run in constant time.
+ * Operations like <tt>add()</tt> and <tt>set()</tt> are <tt>O(log(distinctValues.length))</tt>.
+ * <p>
+ * Upon instantiation a contract is signed that defines the distinct values allowed to be hold in this list.
+ * It is not legal to store elements other than specified by the contract.
+ * Any attempt to violate the contract will throw an <tt>IllegalArgumentException</tt>.
+ * <p>
+ * Although access methods are only defined on <tt>long</tt> values you can also store
+ * all other primitive data types: <tt>boolean</tt>, <tt>byte</tt>, <tt>short</tt>, <tt>int</tt>, <tt>long</tt>, <tt>float</tt>, <tt>double</tt> and <tt>char</tt>.
+ * You can do this by explicitly representing them as <tt>long</tt> values.
+ * Use casts for discrete data types.
+ * Use the methods of <tt>java.lang.Float</tt> and <tt>java.lang.Double</tt> for floating point data types:
+ * Recall that with those methods you can convert any floating point value to a <tt>long</tt> value and back <b>without losing any precision</b>:
+ * <p>
+ * <b>Example usage:</b><pre>
+ * DistinctNumberList list = ... instantiation goes here
+ * double d1 = 1.234;
+ * list.add(Double.doubleToLongBits(d1));
+ * double d2 = Double.longBitsToDouble(list.get(0));
+ * if (d1!=d2) System.out.println("This is impossible!");
+ *
+ * DistinctNumberList list2 = ... instantiation goes here
+ * float f1 = 1.234f;
+ * list2.add((long) Float.floatToIntBits(f1));
+ * float f2 = Float.intBitsToFloat((int)list2.get(0));
+ * if (f1!=f2) System.out.println("This is impossible!");
+ * </pre>
+ *
+ * @see LongArrayList
+ * @see MinMaxNumberList
+ * @see java.lang.Float
+ * @see java.lang.Double
+ * @author wolfgang.hoschek@cern.ch
+ * @version 1.0, 09/24/99
+ */
+/**
+ * @deprecated until unit tests are in place. Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class DistinctNumberList extends org.apache.mahout.colt.list.AbstractLongList {
+ protected long[] distinctValues;
+ protected MinMaxNumberList elements;
+/**
+ * Constructs an empty list with the specified initial capacity and the specified distinct values allowed to be hold in this list.
+ *
+ * @param distinctValues an array sorted ascending containing the distinct values allowed to be hold in this list.
+ * @param initialCapacity the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+public DistinctNumberList(long[] distinctValues, int initialCapacity) {
+ setUp(distinctValues,initialCapacity);
+}
+/**
+ * Appends the specified element to the end of this list.
+ *
+ * @param element element to be appended to this list.
+ */
+public void add(long element) {
+ //overridden for performance only.
+ elements.add(codeOf(element));
+ size++;
+}
+/**
+ * Returns the code that shall be stored for the given element.
+ */
+protected int codeOf(long element) {
+ int index = java.util.Arrays.binarySearch(distinctValues,element);
+ if (index<0) throw new IllegalArgumentException("Element="+element+" not contained in distinct elements.");
+ return index;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ *
+ * @param minCapacity the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+ elements.ensureCapacity(minCapacity);
+}
+/**
+ * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may return invalid elements without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index >= 0 && index < size()</tt>.
+ *
+ * @param index index of element to return.
+ */
+public long getQuick(int index) {
+ return distinctValues[(int)(elements.getQuick(index))];
+}
+/**
+ * Removes from the receiver all elements whose index is between
+ * <code>from</code>, inclusive and <code>to</code>, inclusive. Shifts any succeeding
+ * elements to the left (reduces their index).
+ * This call shortens the list by <tt>(to - from + 1)</tt> elements.
+ *
+ * @param from index of first element to be removed.
+ * @param to index of last element to be removed.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public void removeFromTo(int from, int to) {
+ elements.removeFromTo(from,to);
+ size -= to-from+1;
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may access invalid indexes without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index >= 0 && index < size()</tt>.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ */
+public void setQuick(int index, long element) {
+ elements.setQuick(index,codeOf(element));
+}
+/**
+ * Sets the size of the receiver without modifying it otherwise.
+ * This method should not release or allocate new memory but simply set some instance variable like <tt>size</tt>.
+ */
+protected void setSizeRaw(int newSize) {
+ super.setSizeRaw(newSize);
+ elements.setSizeRaw(newSize);
+}
+/**
+ * Sets the receiver to an empty list with the specified initial capacity and the specified distinct values allowed to be hold in this list.
+ *
+ * @param distinctValues an array sorted ascending containing the distinct values allowed to be hold in this list.
+ * @param initialCapacity the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+protected void setUp(long[] distinctValues, int initialCapacity) {
+ this.distinctValues = distinctValues;
+ //java.util.Arrays.sort(this.distinctElements);
+ this.elements = new MinMaxNumberList(0,distinctValues.length-1,initialCapacity);
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current
+ * size. An application can use this operation to minimize the
+ * storage of the receiver.
+ */
+public void trimToSize() {
+ elements.trimToSize();
+}
+}
Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DistinctNumberList.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DoubleArrayList.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DoubleArrayList.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DoubleArrayList.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DoubleArrayList.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,487 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.list;
+
+import org.apache.mahout.colt.function.DoubleProcedure;
+/**
+Resizable list holding <code>double</code> elements; implemented with arrays.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+*/
+/**
+ * @deprecated until unit tests are in place. Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class DoubleArrayList extends AbstractDoubleList {
+ /**
+ * The array buffer into which the elements of the list are stored.
+ * The capacity of the list is the length of this array buffer.
+ * @serial
+ */
+ protected double[] elements;
+/**
+ * Constructs an empty list.
+ */
+public DoubleArrayList() {
+ this(10);
+}
+/**
+ * Constructs a list containing the specified elements.
+ * The initial size and capacity of the list is the length of the array.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @param elements the array to be backed by the the constructed list
+ */
+public DoubleArrayList(double[] elements) {
+ elements(elements);
+}
+/**
+ * Constructs an empty list with the specified initial capacity.
+ *
+ * @param initialCapacity the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+public DoubleArrayList(int initialCapacity) {
+ this(new double[initialCapacity]);
+ setSizeRaw(0);
+}
+/**
+ * Appends the specified element to the end of this list.
+ *
+ * @param element element to be appended to this list.
+ */
+public void add(double element) {
+ // overridden for performance only.
+ if (size == elements.length) ensureCapacity(size + 1);
+ elements[size++] = element;
+}
+/**
+ * Inserts the specified element before the specified position into the receiver.
+ * Shifts the element currently at that position (if any) and
+ * any subsequent elements to the right.
+ *
+ * @param index index before which the specified element is to be inserted (must be in [0,size]).
+ * @param element element to be inserted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>index < 0 || index > size()</tt>).
+ */
+public void beforeInsert(int index, double element) {
+ // overridden for performance only.
+ if (size == index) {
+ add(element);
+ return;
+ }
+ if (index > size || index < 0)
+ throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+ ensureCapacity(size + 1);
+ System.arraycopy(elements, index, elements, index+1, size-index);
+ elements[index] = element;
+ size++;
+}
+/**
+ * Searches the receiver for the specified value using
+ * the binary search algorithm. The receiver must <strong>must</strong> be
+ * sorted (as by the sort method) prior to making this call. If
+ * it is not sorted, the results are undefined: in particular, the call
+ * may enter an infinite loop. If the receiver contains multiple elements
+ * equal to the specified object, there is no guarantee which instance
+ * will be found.
+ *
+ * @param key the value to be searched for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return index of the search key, if it is contained in the receiver;
+ * otherwise, <tt>(-(<i>insertion point</i>) - 1)</tt>. The <i>insertion
+ * point</i> is defined as the the point at which the value would
+ * be inserted into the receiver: the index of the first
+ * element greater than the key, or <tt>receiver.size()</tt>, if all
+ * elements in the receiver are less than the specified key. Note
+ * that this guarantees that the return value will be >= 0 if
+ * and only if the key is found.
+ * @see org.apache.mahout.colt.Sorting
+ * @see java.util.Arrays
+ */
+public int binarySearchFromTo(double key, int from, int to) {
+ return org.apache.mahout.colt.Sorting.binarySearchFromTo(this.elements,key,from,to);
+}
+/**
+ * Returns a deep copy of the receiver.
+ *
+ * @return a deep copy of the receiver.
+ */
+public Object clone() {
+ // overridden for performance only.
+ DoubleArrayList clone = new DoubleArrayList((double[]) elements.clone());
+ clone.setSizeRaw(size);
+ return clone;
+}
+/**
+ * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result.
+ *
+ * @return a deep copy of the receiver.
+ */
+public DoubleArrayList copy() {
+ return (DoubleArrayList) clone();
+}
+/**
+ * Returns the elements currently stored, including invalid elements between size and capacity, if any.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the returned array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @return the elements currently stored.
+ */
+public double[] elements() {
+ return elements;
+}
+/**
+ * Sets the receiver's elements to be the specified array (not a copy of it).
+ *
+ * The size and capacity of the list is the length of the array.
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @param elements the new elements to be stored.
+ * @return the receiver itself.
+ */
+public AbstractDoubleList elements(double[] elements) {
+ this.elements=elements;
+ this.size=elements.length;
+ return this;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ *
+ * @param minCapacity the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+ elements = org.apache.mahout.colt.Arrays.ensureCapacity(elements,minCapacity);
+}
+/**
+ * Compares the specified Object with the receiver.
+ * Returns true if and only if the specified Object is also an ArrayList of the same type, both Lists have the
+ * same size, and all corresponding pairs of elements in the two Lists are identical.
+ * In other words, two Lists are defined to be equal if they contain the
+ * same elements in the same order.
+ *
+ * @param otherObj the Object to be compared for equality with the receiver.
+ * @return true if the specified Object is equal to the receiver.
+ */
+public boolean equals(Object otherObj) { //delta
+ // overridden for performance only.
+ if (! (otherObj instanceof DoubleArrayList)) return super.equals(otherObj);
+ if (this==otherObj) return true;
+ if (otherObj==null) return false;
+ DoubleArrayList other = (DoubleArrayList) otherObj;
+ if (size()!=other.size()) return false;
+
+ double[] theElements = elements();
+ double[] otherElements = other.elements();
+ for (int i=size(); --i >= 0; ) {
+ if (theElements[i] != otherElements[i]) return false;
+ }
+ return true;
+}
+/**
+ * Applies a procedure to each element of the receiver, if any.
+ * Starts at index 0, moving rightwards.
+ * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues.
+ * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise.
+ */
+public boolean forEach(DoubleProcedure procedure) {
+ // overridden for performance only.
+ double[] theElements = elements;
+ int theSize = size;
+
+ for (int i=0; i<theSize;) if (! procedure.apply(theElements[i++])) return false;
+ return true;
+}
+/**
+ * Returns the element at the specified position in the receiver.
+ *
+ * @param index index of element to return.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * < 0 || index >= size()).
+ */
+public double get(int index) {
+ // overridden for performance only.
+ if (index >= size || index < 0)
+ throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+ return elements[index];
+}
+/**
+ * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may return invalid elements without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index >= 0 && index < size()</tt>.
+ *
+ * @param index index of element to return.
+ */
+public double getQuick(int index) {
+ return elements[index];
+}
+/**
+ * Returns the index of the first occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches between <code>from</code>, inclusive and <code>to</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return the index of the first occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public int indexOfFromTo(double element, int from, int to) {
+ // overridden for performance only.
+ if (size==0) return -1;
+ checkRangeFromTo(from, to, size);
+
+ double[] theElements = elements;
+ for (int i = from ; i <= to; i++) {
+ if (element==theElements[i]) {return i;} //found
+ }
+ return -1; //not found
+}
+/**
+ * Returns the index of the last occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches beginning at <code>to</code>, inclusive until <code>from</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return the index of the last occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public int lastIndexOfFromTo(double element, int from, int to) {
+ // overridden for performance only.
+ if (size==0) return -1;
+ checkRangeFromTo(from, to, size);
+
+ double[] theElements = elements;
+ for (int i = to ; i >= from; i--) {
+ if (element==theElements[i]) {return i;} //found
+ }
+ return -1; //not found
+}
+/**
+ * Returns a new list of the part of the receiver between <code>from</code>, inclusive, and <code>to</code>, inclusive.
+ * @param from the index of the first element (inclusive).
+ * @param to the index of the last element (inclusive).
+ * @return a new list
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public AbstractDoubleList partFromTo(int from, int to) {
+ if (size==0) return new DoubleArrayList(0);
+
+ checkRangeFromTo(from, to, size);
+
+ double[] part = new double[to-from+1];
+ System.arraycopy(elements, from, part, 0, to-from+1);
+ return new DoubleArrayList(part);
+}
+/**
+* Removes from the receiver all elements that are contained in the specified list.
+* Tests for identity.
+*
+* @param other the other list.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean removeAll(AbstractDoubleList other) {
+ // overridden for performance only.
+ if (! (other instanceof DoubleArrayList)) return super.removeAll(other);
+
+ /* There are two possibilities to do the thing
+ a) use other.indexOf(...)
+ b) sort other, then use other.binarySearch(...)
+
+ Let's try to figure out which one is faster. Let M=size, N=other.size, then
+ a) takes O(M*N) steps
+ b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+
+ Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+ */
+ if (other.size()==0) {return false;} //nothing to do
+ int limit = other.size()-1;
+ int j=0;
+ double[] theElements = elements;
+ int mySize = size();
+
+ double N=(double) other.size();
+ double M=(double) mySize;
+ if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+ // it is faster to sort other before searching in it
+ DoubleArrayList sortedList = (DoubleArrayList) other.clone();
+ sortedList.quickSort();
+
+ for (int i=0; i<mySize ; i++) {
+ if (sortedList.binarySearchFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+ }
+ }
+ else {
+ // it is faster to search in other without sorting
+ for (int i=0; i<mySize ; i++) {
+ if (other.indexOfFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+ }
+ }
+
+ boolean modified = (j!=mySize);
+ setSize(j);
+ return modified;
+}
+/**
+ * Replaces a number of elements in the receiver with the same number of elements of another list.
+ * Replaces elements in the receiver, between <code>from</code> (inclusive) and <code>to</code> (inclusive),
+ * with elements of <code>other</code>, starting from <code>otherFrom</code> (inclusive).
+ *
+ * @param from the position of the first element to be replaced in the receiver
+ * @param to the position of the last element to be replaced in the receiver
+ * @param other list holding elements to be copied into the receiver.
+ * @param otherFrom position of first element within other list to be copied.
+ */
+public void replaceFromToWithFrom(int from, int to, AbstractDoubleList other, int otherFrom) {
+ // overridden for performance only.
+ if (! (other instanceof DoubleArrayList)) {
+ // slower
+ super.replaceFromToWithFrom(from,to,other,otherFrom);
+ return;
+ }
+ int length=to-from+1;
+ if (length>0) {
+ checkRangeFromTo(from, to, size());
+ checkRangeFromTo(otherFrom,otherFrom+length-1,other.size());
+ System.arraycopy(((DoubleArrayList) other).elements, otherFrom, elements, from, length);
+ }
+}
+/**
+* Retains (keeps) only the elements in the receiver that are contained in the specified other list.
+* In other words, removes from the receiver all of its elements that are not contained in the
+* specified other list.
+* @param other the other list to test against.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean retainAll(AbstractDoubleList other) {
+ // overridden for performance only.
+ if (! (other instanceof DoubleArrayList)) return super.retainAll(other);
+
+ /* There are two possibilities to do the thing
+ a) use other.indexOf(...)
+ b) sort other, then use other.binarySearch(...)
+
+ Let's try to figure out which one is faster. Let M=size, N=other.size, then
+ a) takes O(M*N) steps
+ b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+
+ Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+ */
+ int limit = other.size()-1;
+ int j=0;
+ double[] theElements = elements;
+ int mySize = size();
+
+ double N=(double) other.size();
+ double M=(double) mySize;
+ if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+ // it is faster to sort other before searching in it
+ DoubleArrayList sortedList = (DoubleArrayList) other.clone();
+ sortedList.quickSort();
+
+ for (int i=0; i<mySize ; i++) {
+ if (sortedList.binarySearchFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+ }
+ }
+ else {
+ // it is faster to search in other without sorting
+ for (int i=0; i<mySize ; i++) {
+ if (other.indexOfFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+ }
+ }
+
+ boolean modified = (j!=mySize);
+ setSize(j);
+ return modified;
+}
+/**
+ * Reverses the elements of the receiver.
+ * Last becomes first, second last becomes second first, and so on.
+ */
+public void reverse() {
+ // overridden for performance only.
+ double tmp;
+ int limit=size/2;
+ int j=size-1;
+
+ double[] theElements = elements;
+ for (int i=0; i<limit;) { //swap
+ tmp=theElements[i];
+ theElements[i++]=theElements[j];
+ theElements[j--]=tmp;
+ }
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * < 0 || index >= size()).
+ */
+public void set(int index, double element) {
+ // overridden for performance only.
+ if (index >= size || index < 0)
+ throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+ elements[index] = element;
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may access invalid indexes without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index >= 0 && index < size()</tt>.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ */
+public void setQuick(int index, double element) {
+ elements[index] = element;
+}
+/**
+ * Randomly permutes the part of the receiver between <code>from</code> (inclusive) and <code>to</code> (inclusive).
+ * @param from the index of the first element (inclusive) to be permuted.
+ * @param to the index of the last element (inclusive) to be permuted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public void shuffleFromTo(int from, int to) {
+ // overridden for performance only.
+ if (size==0) {return;}
+ checkRangeFromTo(from, to, size);
+
+ org.apache.mahout.jet.random.Uniform gen = new org.apache.mahout.jet.random.Uniform(new org.apache.mahout.jet.random.engine.DRand(new java.util.Date()));
+ double tmpElement;
+ double[] theElements = elements;
+ int random;
+ for (int i=from; i<to; i++) {
+ random = gen.nextIntFromTo(i, to);
+
+ //swap(i, random)
+ tmpElement = theElements[random];
+ theElements[random]=theElements[i];
+ theElements[i]=tmpElement;
+ }
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current
+ * size. Releases any superfluos internal memory. An application can use this operation to minimize the
+ * storage of the receiver.
+ */
+public void trimToSize() {
+ elements = org.apache.mahout.colt.Arrays.trimToCapacity(elements,size());
+}
+}
Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/DoubleArrayList.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/FloatArrayList.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/FloatArrayList.java?rev=883365&view=auto
==============================================================================
--- lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/FloatArrayList.java (added)
+++ lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/FloatArrayList.java Mon Nov 23 15:14:26 2009
@@ -0,0 +1,483 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.colt.list;
+
+import org.apache.mahout.colt.function.FloatProcedure;
+/**
+Resizable list holding <code>float</code> elements; implemented with arrays.
+First see the <a href="package-summary.html">package summary</a> and javadoc <a href="package-tree.html">tree view</a> to get the broad picture.
+*/
+/**
+ * @deprecated until unit tests are in place. Until this time, this class/interface is unsupported.
+ */
+@Deprecated
+public class FloatArrayList extends AbstractFloatList {
+ /**
+ * The array buffer into which the elements of the list are stored.
+ * The capacity of the list is the length of this array buffer.
+ * @serial
+ */
+ protected float[] elements;
+/**
+ * Constructs an empty list.
+ */
+public FloatArrayList() {
+ this(10);
+}
+/**
+ * Constructs a list containing the specified elements.
+ * The initial size and capacity of the list is the length of the array.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @param elements the array to be backed by the the constructed list
+ */
+public FloatArrayList(float[] elements) {
+ elements(elements);
+}
+/**
+ * Constructs an empty list with the specified initial capacity.
+ *
+ * @param initialCapacity the number of elements the receiver can hold without auto-expanding itself by allocating new internal memory.
+ */
+public FloatArrayList(int initialCapacity) {
+ this(new float[initialCapacity]);
+ setSizeRaw(0);
+}
+/**
+ * Appends the specified element to the end of this list.
+ *
+ * @param element element to be appended to this list.
+ */
+public void add(float element) {
+ // overridden for performance only.
+ if (size == elements.length) ensureCapacity(size + 1);
+ elements[size++] = element;
+}
+/**
+ * Inserts the specified element before the specified position into the receiver.
+ * Shifts the element currently at that position (if any) and
+ * any subsequent elements to the right.
+ *
+ * @param index index before which the specified element is to be inserted (must be in [0,size]).
+ * @param element element to be inserted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>index < 0 || index > size()</tt>).
+ */
+public void beforeInsert(int index, float element) {
+ // overridden for performance only.
+ if (index > size || index < 0)
+ throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+ ensureCapacity(size + 1);
+ System.arraycopy(elements, index, elements, index+1, size-index);
+ elements[index] = element;
+ size++;
+}
+/**
+ * Searches the receiver for the specified value using
+ * the binary search algorithm. The receiver must <strong>must</strong> be
+ * sorted (as by the sort method) prior to making this call. If
+ * it is not sorted, the results are undefined: in particular, the call
+ * may enter an infinite loop. If the receiver contains multiple elements
+ * equal to the specified object, there is no guarantee which instance
+ * will be found.
+ *
+ * @param key the value to be searched for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return index of the search key, if it is contained in the receiver;
+ * otherwise, <tt>(-(<i>insertion point</i>) - 1)</tt>. The <i>insertion
+ * point</i> is defined as the the point at which the value would
+ * be inserted into the receiver: the index of the first
+ * element greater than the key, or <tt>receiver.size()</tt>, if all
+ * elements in the receiver are less than the specified key. Note
+ * that this guarantees that the return value will be >= 0 if
+ * and only if the key is found.
+ * @see org.apache.mahout.colt.Sorting
+ * @see java.util.Arrays
+ */
+public int binarySearchFromTo(float key, int from, int to) {
+ return org.apache.mahout.colt.Sorting.binarySearchFromTo(this.elements,key,from,to);
+}
+/**
+ * Returns a deep copy of the receiver.
+ *
+ * @return a deep copy of the receiver.
+ */
+public Object clone() {
+ // overridden for performance only.
+ FloatArrayList clone = new FloatArrayList((float[]) elements.clone());
+ clone.setSizeRaw(size);
+ return clone;
+}
+/**
+ * Returns a deep copy of the receiver; uses <code>clone()</code> and casts the result.
+ *
+ * @return a deep copy of the receiver.
+ */
+public FloatArrayList copy() {
+ return (FloatArrayList) clone();
+}
+/**
+ * Returns the elements currently stored, including invalid elements between size and capacity, if any.
+ *
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the returned array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @return the elements currently stored.
+ */
+public float[] elements() {
+ return elements;
+}
+/**
+ * Sets the receiver's elements to be the specified array (not a copy of it).
+ *
+ * The size and capacity of the list is the length of the array.
+ * <b>WARNING:</b> For efficiency reasons and to keep memory usage low, <b>the array is not copied</b>.
+ * So if subsequently you modify the specified array directly via the [] operator, be sure you know what you're doing.
+ *
+ * @param elements the new elements to be stored.
+ * @return the receiver itself.
+ */
+public AbstractFloatList elements(float[] elements) {
+ this.elements=elements;
+ this.size=elements.length;
+ return this;
+}
+/**
+ * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new internal memory.
+ * If necessary, allocates new internal memory and increases the capacity of the receiver.
+ *
+ * @param minCapacity the desired minimum capacity.
+ */
+public void ensureCapacity(int minCapacity) {
+ elements = org.apache.mahout.colt.Arrays.ensureCapacity(elements,minCapacity);
+}
+/**
+ * Compares the specified Object with the receiver.
+ * Returns true if and only if the specified Object is also an ArrayList of the same type, both Lists have the
+ * same size, and all corresponding pairs of elements in the two Lists are identical.
+ * In other words, two Lists are defined to be equal if they contain the
+ * same elements in the same order.
+ *
+ * @param otherObj the Object to be compared for equality with the receiver.
+ * @return true if the specified Object is equal to the receiver.
+ */
+public boolean equals(Object otherObj) { //delta
+ // overridden for performance only.
+ if (! (otherObj instanceof FloatArrayList)) return super.equals(otherObj);
+ if (this==otherObj) return true;
+ if (otherObj==null) return false;
+ FloatArrayList other = (FloatArrayList) otherObj;
+ if (size()!=other.size()) return false;
+
+ float[] theElements = elements();
+ float[] otherElements = other.elements();
+ for (int i=size(); --i >= 0; ) {
+ if (theElements[i] != otherElements[i]) return false;
+ }
+ return true;
+}
+/**
+ * Applies a procedure to each element of the receiver, if any.
+ * Starts at index 0, moving rightwards.
+ * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise continues.
+ * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise.
+ */
+public boolean forEach(FloatProcedure procedure) {
+ // overridden for performance only.
+ float[] theElements = elements;
+ int theSize = size;
+
+ for (int i=0; i<theSize;) if (! procedure.apply(theElements[i++])) return false;
+ return true;
+}
+/**
+ * Returns the element at the specified position in the receiver.
+ *
+ * @param index index of element to return.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * < 0 || index >= size()).
+ */
+public float get(int index) {
+ // overridden for performance only.
+ if (index >= size || index < 0)
+ throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+ return elements[index];
+}
+/**
+ * Returns the element at the specified position in the receiver; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may return invalid elements without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index >= 0 && index < size()</tt>.
+ *
+ * @param index index of element to return.
+ */
+public float getQuick(int index) {
+ return elements[index];
+}
+/**
+ * Returns the index of the first occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches between <code>from</code>, inclusive and <code>to</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return the index of the first occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public int indexOfFromTo(float element, int from, int to) {
+ // overridden for performance only.
+ if (size==0) return -1;
+ checkRangeFromTo(from, to, size);
+
+ float[] theElements = elements;
+ for (int i = from ; i <= to; i++) {
+ if (element==theElements[i]) {return i;} //found
+ }
+ return -1; //not found
+}
+/**
+ * Returns the index of the last occurrence of the specified
+ * element. Returns <code>-1</code> if the receiver does not contain this element.
+ * Searches beginning at <code>to</code>, inclusive until <code>from</code>, inclusive.
+ * Tests for identity.
+ *
+ * @param element element to search for.
+ * @param from the leftmost search position, inclusive.
+ * @param to the rightmost search position, inclusive.
+ * @return the index of the last occurrence of the element in the receiver; returns <code>-1</code> if the element is not found.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public int lastIndexOfFromTo(float element, int from, int to) {
+ // overridden for performance only.
+ if (size==0) return -1;
+ checkRangeFromTo(from, to, size);
+
+ float[] theElements = elements;
+ for (int i = to ; i >= from; i--) {
+ if (element==theElements[i]) {return i;} //found
+ }
+ return -1; //not found
+}
+/**
+ * Returns a new list of the part of the receiver between <code>from</code>, inclusive, and <code>to</code>, inclusive.
+ * @param from the index of the first element (inclusive).
+ * @param to the index of the last element (inclusive).
+ * @return a new list
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public AbstractFloatList partFromTo(int from, int to) {
+ if (size==0) return new FloatArrayList(0);
+
+ checkRangeFromTo(from, to, size);
+
+ float[] part = new float[to-from+1];
+ System.arraycopy(elements, from, part, 0, to-from+1);
+ return new FloatArrayList(part);
+}
+/**
+* Removes from the receiver all elements that are contained in the specified list.
+* Tests for identity.
+*
+* @param other the other list.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean removeAll(AbstractFloatList other) {
+ // overridden for performance only.
+ if (! (other instanceof FloatArrayList)) return super.removeAll(other);
+
+ /* There are two possibilities to do the thing
+ a) use other.indexOf(...)
+ b) sort other, then use other.binarySearch(...)
+
+ Let's try to figure out which one is faster. Let M=size, N=other.size, then
+ a) takes O(M*N) steps
+ b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+
+ Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+ */
+ if (other.size()==0) {return false;} //nothing to do
+ int limit = other.size()-1;
+ int j=0;
+ float[] theElements = elements;
+ int mySize = size();
+
+ double N=(double) other.size();
+ double M=(double) mySize;
+ if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+ // it is faster to sort other before searching in it
+ FloatArrayList sortedList = (FloatArrayList) other.clone();
+ sortedList.quickSort();
+
+ for (int i=0; i<mySize ; i++) {
+ if (sortedList.binarySearchFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+ }
+ }
+ else {
+ // it is faster to search in other without sorting
+ for (int i=0; i<mySize ; i++) {
+ if (other.indexOfFromTo(theElements[i], 0, limit) < 0) theElements[j++]=theElements[i];
+ }
+ }
+
+ boolean modified = (j!=mySize);
+ setSize(j);
+ return modified;
+}
+/**
+ * Replaces a number of elements in the receiver with the same number of elements of another list.
+ * Replaces elements in the receiver, between <code>from</code> (inclusive) and <code>to</code> (inclusive),
+ * with elements of <code>other</code>, starting from <code>otherFrom</code> (inclusive).
+ *
+ * @param from the position of the first element to be replaced in the receiver
+ * @param to the position of the last element to be replaced in the receiver
+ * @param other list holding elements to be copied into the receiver.
+ * @param otherFrom position of first element within other list to be copied.
+ */
+public void replaceFromToWithFrom(int from, int to, AbstractFloatList other, int otherFrom) {
+ // overridden for performance only.
+ if (! (other instanceof FloatArrayList)) {
+ // slower
+ super.replaceFromToWithFrom(from,to,other,otherFrom);
+ return;
+ }
+ int length=to-from+1;
+ if (length>0) {
+ checkRangeFromTo(from, to, size());
+ checkRangeFromTo(otherFrom,otherFrom+length-1,other.size());
+ System.arraycopy(((FloatArrayList) other).elements, otherFrom, elements, from, length);
+ }
+}
+/**
+* Retains (keeps) only the elements in the receiver that are contained in the specified other list.
+* In other words, removes from the receiver all of its elements that are not contained in the
+* specified other list.
+* @param other the other list to test against.
+* @return <code>true</code> if the receiver changed as a result of the call.
+*/
+public boolean retainAll(AbstractFloatList other) {
+ // overridden for performance only.
+ if (! (other instanceof FloatArrayList)) return super.retainAll(other);
+
+ /* There are two possibilities to do the thing
+ a) use other.indexOf(...)
+ b) sort other, then use other.binarySearch(...)
+
+ Let's try to figure out which one is faster. Let M=size, N=other.size, then
+ a) takes O(M*N) steps
+ b) takes O(N*logN + M*logN) steps (sorting is O(N*logN) and binarySearch is O(logN))
+
+ Hence, if N*logN + M*logN < M*N, we use b) otherwise we use a).
+ */
+ int limit = other.size()-1;
+ int j=0;
+ float[] theElements = elements;
+ int mySize = size();
+
+ double N=(double) other.size();
+ double M=(double) mySize;
+ if ( (N+M)* org.apache.mahout.jet.math.Arithmetic.log2(N) < M*N ) {
+ // it is faster to sort other before searching in it
+ FloatArrayList sortedList = (FloatArrayList) other.clone();
+ sortedList.quickSort();
+
+ for (int i=0; i<mySize ; i++) {
+ if (sortedList.binarySearchFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+ }
+ }
+ else {
+ // it is faster to search in other without sorting
+ for (int i=0; i<mySize ; i++) {
+ if (other.indexOfFromTo(theElements[i], 0, limit) >= 0) theElements[j++]=theElements[i];
+ }
+ }
+
+ boolean modified = (j!=mySize);
+ setSize(j);
+ return modified;
+}
+/**
+ * Reverses the elements of the receiver.
+ * Last becomes first, second last becomes second first, and so on.
+ */
+public void reverse() {
+ // overridden for performance only.
+ float tmp;
+ int limit=size/2;
+ int j=size-1;
+
+ float[] theElements = elements;
+ for (int i=0; i<limit;) { //swap
+ tmp=theElements[i];
+ theElements[i++]=theElements[j];
+ theElements[j--]=tmp;
+ }
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ * @exception IndexOutOfBoundsException index is out of range (index
+ * < 0 || index >= size()).
+ */
+public void set(int index, float element) {
+ // overridden for performance only.
+ if (index >= size || index < 0)
+ throw new IndexOutOfBoundsException("Index: "+index+", Size: "+size);
+ elements[index] = element;
+}
+/**
+ * Replaces the element at the specified position in the receiver with the specified element; <b>WARNING:</b> Does not check preconditions.
+ * Provided with invalid parameters this method may access invalid indexes without throwing any exception!
+ * <b>You should only use this method when you are absolutely sure that the index is within bounds.</b>
+ * Precondition (unchecked): <tt>index >= 0 && index < size()</tt>.
+ *
+ * @param index index of element to replace.
+ * @param element element to be stored at the specified position.
+ */
+public void setQuick(int index, float element) {
+ elements[index] = element;
+}
+/**
+ * Randomly permutes the part of the receiver between <code>from</code> (inclusive) and <code>to</code> (inclusive).
+ * @param from the index of the first element (inclusive) to be permuted.
+ * @param to the index of the last element (inclusive) to be permuted.
+ * @exception IndexOutOfBoundsException index is out of range (<tt>size()>0 && (from<0 || from>to || to>=size())</tt>).
+ */
+public void shuffleFromTo(int from, int to) {
+ // overridden for performance only.
+ if (size==0) {return;}
+ checkRangeFromTo(from, to, size);
+
+ org.apache.mahout.jet.random.Uniform gen = new org.apache.mahout.jet.random.Uniform(new org.apache.mahout.jet.random.engine.DRand(new java.util.Date()));
+ float tmpElement;
+ float[] theElements = elements;
+ int random;
+ for (int i=from; i<to; i++) {
+ random = gen.nextIntFromTo(i, to);
+
+ //swap(i, random)
+ tmpElement = theElements[random];
+ theElements[random]=theElements[i];
+ theElements[i]=tmpElement;
+ }
+}
+/**
+ * Trims the capacity of the receiver to be the receiver's current
+ * size. Releases any superfluos internal memory. An application can use this operation to minimize the
+ * storage of the receiver.
+ */
+public void trimToSize() {
+ elements = org.apache.mahout.colt.Arrays.trimToCapacity(elements,size());
+}
+}
Propchange: lucene/mahout/trunk/matrix/src/main/java/org/apache/mahout/matrix/list/FloatArrayList.java
------------------------------------------------------------------------------
svn:eol-style = native