You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/12/18 00:22:41 UTC
svn commit: r891983 [44/47] - in /lucene/mahout/trunk: ./ core/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/
core/src/main/java/org/apache/mahout/clustering/
core/src/main/java/org/apache/mahout/clustering/canopy/
core/src/main/java/org/a...
Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SmpBlas.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SmpBlas.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SmpBlas.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SmpBlas.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,407 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.matrix.linalg;
+
+import EDU.oswego.cs.dl.util.concurrent.FJTask;
+
+import org.apache.mahout.math.matrix.DoubleMatrix1D;
+import org.apache.mahout.math.matrix.DoubleMatrix2D;
+
+/** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */
+@Deprecated
+public class SmpBlas implements Blas {
+
+ /**
+ * The public global parallel blas; initialized via {@link #allocateBlas}. Do not modify this variable via other means
+ * (it is public).
+ */
+ private static Blas smpBlas = SeqBlas.seqBlas;
+
+ private final Blas seqBlas; // blocks are operated on in parallel; for each block this seq algo is used.
+
+ private final Smp smp;
+
+ private final int maxThreads;
+
+ private static final int NN_THRESHOLD = 30000;
+
+ public static Blas getSmpBlas() {
+ return smpBlas;
+ }
+
+ /** Constructs a blas using a maximum of <tt>maxThreads<tt> threads; each executing the given sequential algos. */
+ protected SmpBlas(int maxThreads, Blas seqBlas) {
+ this.seqBlas = seqBlas;
+ this.maxThreads = maxThreads;
+ this.smp = new Smp(maxThreads);
+ //Smp.smp = new Smp(maxThreads);
+ }
+
+ /**
+ * Sets the public global variable <tt>SmpBlas.smpBlas</tt> to a blas using a maximum of <tt>maxThreads</tt> threads,
+ * each executing the given sequential algorithm; <tt>maxThreads</tt> is normally the number of CPUs. Call this method
+ * at the very beginning of your program. Normally there is no need to call this method more than once.
+ *
+ * @param maxThreads the maximum number of threads (= CPUs) to be used
+ * @param seqBlas the sequential blas algorithms to be used on concurrently processed matrix blocks.
+ */
+ public static void allocateBlas(int maxThreads, Blas seqBlas) {
+ if (smpBlas instanceof SmpBlas) { // no need to change anything?
+ SmpBlas s = (SmpBlas) smpBlas;
+ if (s.maxThreads == maxThreads && s.seqBlas == seqBlas) {
+ return;
+ }
+ }
+
+ if (maxThreads <= 1) {
+ smpBlas = seqBlas;
+ } else {
+ smpBlas = new SmpBlas(maxThreads, seqBlas);
+ }
+ }
+
+ @Override
+ public void assign(DoubleMatrix2D A, final org.apache.mahout.math.function.DoubleFunction function) {
+ run(A, false,
+ new Matrix2DMatrix2DFunction() {
+ @Override
+ public double apply(DoubleMatrix2D AA, DoubleMatrix2D BB) {
+ seqBlas.assign(AA, function);
+ return 0;
+ }
+ }
+ );
+ }
+
+ @Override
+ public void assign(DoubleMatrix2D A, DoubleMatrix2D B,
+ final org.apache.mahout.math.function.DoubleDoubleFunction function) {
+ run(A, B, false,
+ new Matrix2DMatrix2DFunction() {
+ @Override
+ public double apply(DoubleMatrix2D AA, DoubleMatrix2D BB) {
+ seqBlas.assign(AA, BB, function);
+ return 0;
+ }
+ }
+ );
+ }
+
+ @Override
+ public double dasum(DoubleMatrix1D x) {
+ return seqBlas.dasum(x);
+ }
+
+ @Override
+ public void daxpy(double alpha, DoubleMatrix1D x, DoubleMatrix1D y) {
+ seqBlas.daxpy(alpha, x, y);
+ }
+
+ @Override
+ public void daxpy(double alpha, DoubleMatrix2D A, DoubleMatrix2D B) {
+ seqBlas.daxpy(alpha, A, B);
+ }
+
+ @Override
+ public void dcopy(DoubleMatrix1D x, DoubleMatrix1D y) {
+ seqBlas.dcopy(x, y);
+ }
+
+ @Override
+ public void dcopy(DoubleMatrix2D A, DoubleMatrix2D B) {
+ seqBlas.dcopy(A, B);
+ }
+
+ @Override
+ public double ddot(DoubleMatrix1D x, DoubleMatrix1D y) {
+ return seqBlas.ddot(x, y);
+ }
+
+ @Override
+ public void dgemm(final boolean transposeA, final boolean transposeB, final double alpha, DoubleMatrix2D A,
+ DoubleMatrix2D B, final double beta, DoubleMatrix2D C) {
+ /*
+ determine how to split and parallelize best into blocks
+ if more B.columns than tasks --> split B.columns, as follows:
+
+ xx|xx|xxx B
+ xx|xx|xxx
+ xx|xx|xxx
+ A
+ xxx xx|xx|xxx C
+ xxx xx|xx|xxx
+ xxx xx|xx|xxx
+ xxx xx|xx|xxx
+ xxx xx|xx|xxx
+
+ if less B.columns than tasks --> split A.rows, as follows:
+
+ xxxxxxx B
+ xxxxxxx
+ xxxxxxx
+ A
+ xxx xxxxxxx C
+ xxx xxxxxxx
+ --- -------
+ xxx xxxxxxx
+ xxx xxxxxxx
+ --- -------
+ xxx xxxxxxx
+
+ */
+ if (transposeA) {
+ dgemm(false, transposeB, alpha, A.viewDice(), B, beta, C);
+ return;
+ }
+ if (transposeB) {
+ dgemm(transposeA, false, alpha, A, B.viewDice(), beta, C);
+ return;
+ }
+ int m = A.rows();
+ int n = A.columns();
+ int p = B.columns();
+
+ if (B.rows() != n) {
+ throw new IllegalArgumentException(
+ "Matrix2D inner dimensions must agree:" + A.toStringShort() + ", " + B.toStringShort());
+ }
+ if (C.rows() != m || C.columns() != p) {
+ throw new IllegalArgumentException(
+ "Incompatibel result matrix: " + A.toStringShort() + ", " + B.toStringShort() + ", " + C.toStringShort());
+ }
+ if (A == C || B == C) {
+ throw new IllegalArgumentException("Matrices must not be identical");
+ }
+
+ long flops = 2L * m * n * p;
+ int noOfTasks = (int) Math.min(flops / 30000, this.maxThreads); // each thread should process at least 30000 flops
+ boolean splitB = (p >= noOfTasks);
+ int width = splitB ? p : m;
+ noOfTasks = Math.min(width, noOfTasks);
+
+ if (noOfTasks < 2) { // parallelization doesn't pay off (too much start up overhead)
+ seqBlas.dgemm(transposeA, transposeB, alpha, A, B, beta, C);
+ return;
+ }
+
+ // set up concurrent tasks
+ int span = width / noOfTasks;
+ final FJTask[] subTasks = new FJTask[noOfTasks];
+ for (int i = 0; i < noOfTasks; i++) {
+ int offset = i * span;
+ if (i == noOfTasks - 1) {
+ span = width - span * i;
+ } // last span may be a bit larger
+
+ final DoubleMatrix2D AA, BB, CC;
+ if (splitB) {
+ // split B along columns into blocks
+ AA = A;
+ BB = B.viewPart(0, offset, n, span);
+ CC = C.viewPart(0, offset, m, span);
+ } else {
+ // split A along rows into blocks
+ AA = A.viewPart(offset, 0, span, n);
+ BB = B;
+ CC = C.viewPart(offset, 0, span, p);
+ }
+
+ subTasks[i] = new FJTask() {
+ @Override
+ public void run() {
+ seqBlas.dgemm(transposeA, transposeB, alpha, AA, BB, beta, CC);
+ //log.info("Hello "+offset);
+ }
+ };
+ }
+
+ // run tasks and wait for completion
+ try {
+ this.smp.taskGroup.invoke(
+ new FJTask() {
+ @Override
+ public void run() {
+ coInvoke(subTasks);
+ }
+ }
+ );
+ } catch (InterruptedException exc) {
+ }
+ }
+
+ @Override
+ public void dgemv(final boolean transposeA, final double alpha, DoubleMatrix2D A, final DoubleMatrix1D x,
+ final double beta, DoubleMatrix1D y) {
+ /*
+ split A, as follows:
+
+ x x
+ x
+ x
+ A
+ xxx x y
+ xxx x
+ --- -
+ xxx x
+ xxx x
+ --- -
+ xxx x
+
+ */
+ if (transposeA) {
+ dgemv(false, alpha, A.viewDice(), x, beta, y);
+ return;
+ }
+ int m = A.rows();
+ int n = A.columns();
+ long flops = 2L * m * n;
+ int noOfTasks = (int) Math.min(flops / 30000, this.maxThreads); // each thread should process at least 30000 flops
+ int width = A.rows();
+ noOfTasks = Math.min(width, noOfTasks);
+
+ if (noOfTasks < 2) { // parallelization doesn't pay off (too much start up overhead)
+ seqBlas.dgemv(transposeA, alpha, A, x, beta, y);
+ return;
+ }
+
+ // set up concurrent tasks
+ int span = width / noOfTasks;
+ final FJTask[] subTasks = new FJTask[noOfTasks];
+ for (int i = 0; i < noOfTasks; i++) {
+ int offset = i * span;
+ if (i == noOfTasks - 1) {
+ span = width - span * i;
+ } // last span may be a bit larger
+
+ // split A along rows into blocks
+ final DoubleMatrix2D AA = A.viewPart(offset, 0, span, n);
+ final DoubleMatrix1D yy = y.viewPart(offset, span);
+
+ subTasks[i] = new FJTask() {
+ @Override
+ public void run() {
+ seqBlas.dgemv(transposeA, alpha, AA, x, beta, yy);
+ //log.info("Hello "+offset);
+ }
+ };
+ }
+
+ // run tasks and wait for completion
+ try {
+ this.smp.taskGroup.invoke(
+ new FJTask() {
+ @Override
+ public void run() {
+ coInvoke(subTasks);
+ }
+ }
+ );
+ } catch (InterruptedException exc) {
+ }
+ }
+
+ @Override
+ public void dger(double alpha, DoubleMatrix1D x, DoubleMatrix1D y, DoubleMatrix2D A) {
+ seqBlas.dger(alpha, x, y, A);
+ }
+
+ @Override
+ public double dnrm2(DoubleMatrix1D x) {
+ return seqBlas.dnrm2(x);
+ }
+
+ @Override
+ public void drot(DoubleMatrix1D x, DoubleMatrix1D y, double c, double s) {
+ seqBlas.drot(x, y, c, s);
+ }
+
+ @Override
+ public void drotg(double a, double b, double[] rotvec) {
+ seqBlas.drotg(a, b, rotvec);
+ }
+
+ @Override
+ public void dscal(double alpha, DoubleMatrix1D x) {
+ seqBlas.dscal(alpha, x);
+ }
+
+ @Override
+ public void dscal(double alpha, DoubleMatrix2D A) {
+ seqBlas.dscal(alpha, A);
+ }
+
+ @Override
+ public void dswap(DoubleMatrix1D x, DoubleMatrix1D y) {
+ seqBlas.dswap(x, y);
+ }
+
+ @Override
+ public void dswap(DoubleMatrix2D A, DoubleMatrix2D B) {
+ seqBlas.dswap(A, B);
+ }
+
+ @Override
+ public void dsymv(boolean isUpperTriangular, double alpha, DoubleMatrix2D A, DoubleMatrix1D x, double beta,
+ DoubleMatrix1D y) {
+ seqBlas.dsymv(isUpperTriangular, alpha, A, x, beta, y);
+ }
+
+ @Override
+ public void dtrmv(boolean isUpperTriangular, boolean transposeA, boolean isUnitTriangular, DoubleMatrix2D A,
+ DoubleMatrix1D x) {
+ seqBlas.dtrmv(isUpperTriangular, transposeA, isUnitTriangular, A, x);
+ }
+
+ @Override
+ public int idamax(DoubleMatrix1D x) {
+ return seqBlas.idamax(x);
+ }
+
+ protected double[] run(DoubleMatrix2D A, DoubleMatrix2D B, boolean collectResults, Matrix2DMatrix2DFunction fun) {
+ DoubleMatrix2D[][] blocks = this.smp.splitBlockedNN(A, B, NN_THRESHOLD, A.rows() * A.columns());
+ //blocks = this.smp.splitStridedNN(A, B, NN_THRESHOLD, A.rows()*A.columns());
+ int b = blocks != null ? blocks[0].length : 1;
+ double[] results = collectResults ? new double[b] : null;
+
+ if (blocks == null) { // too small --> sequential
+ double result = fun.apply(A, B);
+ if (collectResults) {
+ results[0] = result;
+ }
+ return results;
+ } // parallel
+ this.smp.run(blocks[0], blocks[1], results, fun);
+ return results;
+ }
+
+ protected double[] run(DoubleMatrix2D A, boolean collectResults, Matrix2DMatrix2DFunction fun) {
+ DoubleMatrix2D[] blocks = this.smp.splitBlockedNN(A, NN_THRESHOLD, A.rows() * A.columns());
+ //blocks = this.smp.splitStridedNN(A, NN_THRESHOLD, A.rows()*A.columns());
+ int b = blocks != null ? blocks.length : 1;
+ double[] results = collectResults ? new double[b] : null;
+
+ if (blocks == null) { // too small -> sequential
+ double result = fun.apply(A, null);
+ if (collectResults) {
+ results[0] = result;
+ }
+ return results;
+ } // parallel
+ this.smp.run(blocks, null, results, fun);
+ return results;
+ }
+
+ /** Prints various snapshot statistics to System.out; Simply delegates to {@link EDU.oswego.cs.dl.util.concurrent.FJTaskRunnerGroup#stats}. */
+ public void stats() {
+ if (this.smp != null) {
+ this.smp.stats();
+ }
+ }
+
+}
Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/SmpBlas.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/package.html
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/package.html?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/package.html (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/package.html Thu Dec 17 23:22:16 2009
@@ -0,0 +1,118 @@
+<HTML>
+<BODY>
+<p>Linear Algebraic matrix computations operating on {@link org.apache.mahout.math.matrix.DoubleMatrix2D}
+ and {@link org.apache.mahout.math.matrix.DoubleMatrix1D}. </p>
+
+<h1><a name="Overview"></a>Overview</h1>
+
+<p>The linalg package provides easy and performant access to compute intensive
+ Linear Algebra. Much functionality is concentrated in class {@link org.apache.mahout.math.matrix.linalg.Algebra}.
+ Five fundamental matrix decompositions, which consist of pairs or triples of
+ matrices, permutation vectors, and the like, produce results in five decomposition
+ classes. These decompositions are accessed by the <tt>Algebra</tt> class
+ to compute solutions of simultaneous linear equations, determinants, inverses
+ and other matrix functions. The five decompositions are </p>
+<ul>
+ <li> Cholesky Decomposition of symmetric, positive definite matrices</li>
+ <li> LU Decomposition (Gaussian elimination) of rectangular matrices</li>
+ <li> QR Decomposition of rectangular matrices</li>
+ <li> Eigenvalue Decomposition of both symmetric and nonsymmetric square matrices</li>
+ <li> Singular Value Decomposition of rectangular matrices</li>
+</ul>
+<h1>Colt and Jama</h1>
+
+<p>This package could only be rolled out easily because it is to a large degree
+ adapted from interfaces and implementations of the Jama matrix package. See
+ the <a href="http://math.nist.gov/javanumerics/jama">Jama homepage</a>. Due
+ credit is given to Joe Hicklin, Cleve Moler, Peter Webb, Ronald F. Boisvert,
+ Bruce Miller, Roldan Pozo and Karin Remington, the Jama authors from <a href="http://www.mathworks.com/">MathWorks</a>
+ and <a
+ href="http://www.nist.gov/">NIST</a>.</p>
+
+<h2>Design Issues</h2>
+
+<p> Jama matrices are of type <tt>Jama.Matrix</tt>, Colt matrices of type <tt>org.apache.mahout.math.matrix.DoubleMatrix1D</tt>,
+ <tt>org.apache.mahout.math.matrix.DoubleMatrix2D</tt> and <tt>org.apache.mahout.math.matrix.DoubleMatrix3D</tt>.
+
+<p><tt>Jama.Matrix</tt> is not a general-purpose array class. It is designed for
+ a single special purpose: Linear algebra. Because of its limited scope, Jama
+ can combine data structure and algorithms in a class <tt>Jama.Matrix</tt>. In
+ contrast, Colt matrices are general-purpose array classes. Since multi-dimensional
+ matrices (arrays) have many applications, of which only one is linear algebra,
+ Colt matrix packages are designed to avoid fat interfaces, yet allow to form
+ the basis on top of which a broad set of functionality and applications can
+ be defined (a similar spirit is used in STL and IBM <a href="http://math.nist.gov/javanumerics/array/">
+ Array</a>). Thus, data structure and special-purpose algorithms are separated.
+ Class <tt>Algebra</tt> works on <tt>DoubleMatrix2D </tt>and contains the operations
+ of <tt>Jama.Matrix</tt>, but holds no data structure. Class <tt>DoubleMatrix2D</tt>
+ contains an efficient and flexible multi-dimensional array data structure, as
+ well as multi-purpose operations, but (almost) no linear algebraic operations.
+
+<p>As a consequence a Colt user initially faces some additional complexity, but
+ after getting used to such a design, will honour the fact that logically related
+ functionality is logically separated. For example, if a user is not interested
+ in Formatting, Sorting, Partitioning, Statistics, etc. he/she does not see this
+ functionality, because it is neither defined in the linalg package nor the matrix
+ package, but somewhere else.
+
+<p>Perhaps more importantly, such a design will scale over time, as more and more
+ functionality from many scientific and engineering domains is added. Also see
+ <a href="../package-summary.html#Algorithms">matrix algorithms</a>.
+
+<h2> Functionality</h2>
+
+<p>All methods of <tt>Jama.Matrix</tt> are provided in <tt>Algebra</tt>, except
+ for some less important convenience methods. Colt matrices (similar to IBM Arrays)
+ are powerful and flexible data structures. Subrange, slice, dice, flip, selection
+ and sort views are available for Colt matrices, but not for Jama matrices. (They
+ are difficult to implement <i>efficiently</i> with Jama matrices, because they
+ internally use <tt>double[][]</tt> arrays).
+
+<h2>Performance</h2>
+
+<p>No extensive performance studies have been carried out so far.<br>
+ Jama matrices weakly encapsulate a normal <tt>double[][]</tt> array. Dense Colt
+ matrices strongly encapsulate a <tt>double[]</tt> array and use some arithmetic
+ to address cells in 2-d. Addressing a cell is more expensive using <tt>double[][]</tt>
+ arrays, due to bounds-checking, null pointer checks, non-contigous memory, and
+ problems that compilers have to optimize such code. Using <tt>double[]</tt>
+ arrays less bounds-checking, less null pointer checks, better cache locality
+ and better compiler optimizations can be seen, often eliminating bounds-checking
+ and null-pointer checks, paving the way for effective pipelining. See the publications
+ of IBM Watson's <a href="http://www.research.ibm.com/ninja/">Ninja project</a>.
+
+<p>To improve performance, matrix computations should use highly optimized kernels
+ in innermost loops. These kernels are not part of class <tt>Algebra</tt>, but
+ part of <tt>DoubleMatrix2D</tt> and <tt>DoubleMatrix1D</tt>. Otherwise they
+ couldn't be fully optimized. For example, with some arithmetic (not exposed
+ to a user), a loop over a 1-d or 2-d matrix can internally reduce cell adressing
+ overhead. Some of the most critical types of (innermost) loop operations have
+ a corresponding optimized method in <tt>DoubleMatrix2D</tt> and <tt>DoubleMatrix1D</tt>.
+ For example, dot products, multiplications, <tt>assign(function)</tt> transforms
+ and <tt>aggregate</tt> methods are such internally specialized kernels. Feedback
+ may result in a few more optimized kernels. Thus, in the name of performance,
+ in a few cases, algorithms and data structure are not completely separeted.
+
+<p>Some internal optimizations have been introduced, in particular for multiplications,
+ the LU-Decomposition and the Cholesky-Decomposition. The other decomposition
+ classes are almost identical to the corresponding Jama classes - as such they
+ are functional but not (yet) particularly efficient.
+
+<p>For small matrices, you may be better off using Sun's Java 3D 1.2, see <a
+ href="http://java.sun.com/products/java-media/3D/1_2_api/j3dguide/AppendixMath.doc.html#47281">javax.vecmath
+ - spec</a> and <a href="http://java.sun.com/products/java-media/3D/1_2_api/j3dapi/javax/vecmath/package-summary.html">javax.vecmath
+ javadoc</a>.
+
+<p><br>
+
+<h2>Volunteers</h2>
+
+<p align="left"><i> We are looking for volunteers! <br>
+ Do you have a background in Matrix Computations?<br>
+</i><i>Do you care about performance and usability? <br>
+ Are you enthusiastic about Open Source? <br>
+ With your help, this package could become better and richer!<br>
+ Contact <a href="mailto:wolfgang.hoschek@cern.ch">wolfgang.hoschek@cern.ch</a>
+ for more info.</i></p>
+</BODY>
+</HTML>
Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Formatter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Formatter.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Formatter.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Formatter.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,302 @@
+package org.apache.mahout.math.matrix.objectalgo;
+
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+
+import org.apache.mahout.math.matrix.ObjectMatrix1D;
+import org.apache.mahout.math.matrix.ObjectMatrix2D;
+import org.apache.mahout.math.matrix.ObjectMatrix3D;
+import org.apache.mahout.math.matrix.impl.AbstractFormatter;
+import org.apache.mahout.math.matrix.impl.AbstractMatrix1D;
+import org.apache.mahout.math.matrix.impl.AbstractMatrix2D;
+import org.apache.mahout.math.matrix.impl.Former;
+
+/** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */
+@Deprecated
+public class Formatter extends AbstractFormatter {
+
+ /** Constructs and returns a matrix formatter with alignment <tt>LEFT</tt>. */
+ public Formatter() {
+ this(LEFT);
+ }
+
+ /**
+ * Constructs and returns a matrix formatter.
+ *
+ * @param alignment the given alignment used to align a column.
+ */
+ public Formatter(String alignment) {
+ setAlignment(alignment);
+ }
+
+ /** Converts a given cell to a String; no alignment considered. */
+ @Override
+ protected String form(AbstractMatrix1D matrix, int index, Former formatter) {
+ return this.form((ObjectMatrix1D) matrix, index, formatter);
+ }
+
+ /** Converts a given cell to a String; no alignment considered. */
+ protected String form(ObjectMatrix1D matrix, int index, Former formatter) {
+ Object value = matrix.get(index);
+ if (value == null) {
+ return "";
+ }
+ return String.valueOf(value);
+ }
+
+ /** Returns a string representations of all cells; no alignment considered. */
+ @Override
+ protected String[][] format(AbstractMatrix2D matrix) {
+ return this.format((ObjectMatrix2D) matrix);
+ }
+
+ /** Returns a string representations of all cells; no alignment considered. */
+ protected String[][] format(ObjectMatrix2D matrix) {
+ String[][] strings = new String[matrix.rows()][matrix.columns()];
+ for (int row = matrix.rows(); --row >= 0;) {
+ strings[row] = formatRow(matrix.viewRow(row));
+ }
+ return strings;
+ }
+
+ /**
+ * Returns a string <tt>s</tt> such that <tt>Object[] m = s</tt> is a legal Java statement.
+ *
+ * @param matrix the matrix to format.
+ */
+ public String toSourceCode(ObjectMatrix1D matrix) {
+ Formatter copy = (Formatter) this.clone();
+ copy.setPrintShape(false);
+ copy.setColumnSeparator(", ");
+ String lead = "{";
+ String trail = "};";
+ return lead + copy.toString(matrix) + trail;
+ }
+
+ /**
+ * Returns a string <tt>s</tt> such that <tt>Object[] m = s</tt> is a legal Java statement.
+ *
+ * @param matrix the matrix to format.
+ */
+ public String toSourceCode(ObjectMatrix2D matrix) {
+ Formatter copy = (Formatter) this.clone();
+ String b3 = blanks(3);
+ copy.setPrintShape(false);
+ copy.setColumnSeparator(", ");
+ copy.setRowSeparator("},\n" + b3 + '{');
+ String lead = "{\n" + b3 + '{';
+ String trail = "}\n};";
+ return lead + copy.toString(matrix) + trail;
+ }
+
+ /**
+ * Returns a string <tt>s</tt> such that <tt>Object[] m = s</tt> is a legal Java statement.
+ *
+ * @param matrix the matrix to format.
+ */
+ public String toSourceCode(ObjectMatrix3D matrix) {
+ Formatter copy = (Formatter) this.clone();
+ String b3 = blanks(3);
+ String b6 = blanks(6);
+ copy.setPrintShape(false);
+ copy.setColumnSeparator(", ");
+ copy.setRowSeparator("},\n" + b6 + '{');
+ copy.setSliceSeparator("}\n" + b3 + "},\n" + b3 + "{\n" + b6 + '{');
+ String lead = "{\n" + b3 + "{\n" + b6 + '{';
+ String trail = "}\n" + b3 + "}\n}";
+ return lead + copy.toString(matrix) + trail;
+ }
+
+ /**
+ * Returns a string representation of the given matrix.
+ *
+ * @param matrix the matrix to convert.
+ */
+ @Override
+ protected String toString(AbstractMatrix2D matrix) {
+ return this.toString((ObjectMatrix2D) matrix);
+ }
+
+ /**
+ * Returns a string representation of the given matrix.
+ *
+ * @param matrix the matrix to convert.
+ */
+ public String toString(ObjectMatrix1D matrix) {
+ ObjectMatrix2D easy = matrix.like2D(1, matrix.size());
+ easy.viewRow(0).assign(matrix);
+ return toString(easy);
+ }
+
+ /**
+ * Returns a string representation of the given matrix.
+ *
+ * @param matrix the matrix to convert.
+ */
+ public String toString(ObjectMatrix2D matrix) {
+ return super.toString(matrix);
+ }
+
+ /**
+ * Returns a string representation of the given matrix.
+ *
+ * @param matrix the matrix to convert.
+ */
+ public String toString(ObjectMatrix3D matrix) {
+ StringBuilder buf = new StringBuilder();
+ boolean oldPrintShape = this.printShape;
+ this.printShape = false;
+ for (int slice = 0; slice < matrix.slices(); slice++) {
+ if (slice != 0) {
+ buf.append(sliceSeparator);
+ }
+ buf.append(toString(matrix.viewSlice(slice)));
+ }
+ this.printShape = oldPrintShape;
+ if (printShape) {
+ buf.insert(0, shape(matrix) + '\n');
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Returns a string representation of the given matrix with axis as well as rows and columns labeled. Pass
+ * <tt>null</tt> to one or more parameters to indicate that the corresponding decoration element shall not appear in
+ * the string converted matrix.
+ *
+ * @param matrix The matrix to format.
+ * @param rowNames The headers of all rows (to be put to the left of the matrix).
+ * @param columnNames The headers of all columns (to be put to above the matrix).
+ * @param rowAxisName The label of the y-axis.
+ * @param columnAxisName The label of the x-axis.
+ * @param title The overall title of the matrix to be formatted.
+ * @return the matrix converted to a string.
+ */
+ public String toTitleString(ObjectMatrix2D matrix, String[] rowNames, String[] columnNames, String rowAxisName,
+ String columnAxisName, String title) {
+ if (matrix.size() == 0) {
+ return "Empty matrix";
+ }
+ String oldFormat = this.format;
+ this.format = LEFT;
+
+ int rows = matrix.rows();
+ int columns = matrix.columns();
+
+ // determine how many rows and columns are needed
+ int r = 0;
+ r += (columnNames == null ? 0 : 1);
+ int c = 0;
+ c += (rowNames == null ? 0 : 1);
+ c += (rowAxisName == null ? 0 : 1);
+ c += (rowNames != null || rowAxisName != null ? 1 : 0);
+
+ int height = r + Math.max(rows, rowAxisName == null ? 0 : rowAxisName.length());
+ int width = c + columns;
+
+ // make larger matrix holding original matrix and naming strings
+ ObjectMatrix2D titleMatrix = matrix.like(height, width);
+
+ // insert original matrix into larger matrix
+ titleMatrix.viewPart(r, c, rows, columns).assign(matrix);
+
+ // insert column axis name in leading row
+ if (r > 0) {
+ titleMatrix.viewRow(0).viewPart(c, columns).assign(columnNames);
+ }
+
+ // insert row axis name in leading column
+ if (rowAxisName != null) {
+ String[] rowAxisStrings = new String[rowAxisName.length()];
+ for (int i = rowAxisName.length(); --i >= 0;) {
+ rowAxisStrings[i] = rowAxisName.substring(i, i + 1);
+ }
+ titleMatrix.viewColumn(0).viewPart(r, rowAxisName.length()).assign(rowAxisStrings);
+ }
+ // insert row names in next leading columns
+ if (rowNames != null) {
+ titleMatrix.viewColumn(c - 2).viewPart(r, rows).assign(rowNames);
+ }
+
+ // insert vertical "---------" separator line in next leading column
+ if (c > 0) {
+ titleMatrix.viewColumn(c - 2 + 1).viewPart(0, rows + r).assign("|");
+ }
+
+ // convert the large matrix to a string
+ boolean oldPrintShape = this.printShape;
+ this.printShape = false;
+ String str = toString(titleMatrix);
+ this.printShape = oldPrintShape;
+
+ // insert horizontal "--------------" separator line
+ StringBuilder total = new StringBuilder(str);
+ if (columnNames != null) {
+ int i = str.indexOf(rowSeparator);
+ total.insert(i + 1, repeat('-', i) + rowSeparator);
+ } else if (columnAxisName != null) {
+ int i = str.indexOf(rowSeparator);
+ total.insert(0, repeat('-', i) + rowSeparator);
+ }
+
+ // insert line for column axis name
+ if (columnAxisName != null) {
+ int j = 0;
+ if (c > 0) {
+ j = str.indexOf('|');
+ }
+ String s = blanks(j);
+ if (c > 0) {
+ s += "| ";
+ }
+ s = s + columnAxisName + '\n';
+ total.insert(0, s);
+ }
+
+ // insert title
+ if (title != null) {
+ total.insert(0, title + '\n');
+ }
+
+ this.format = oldFormat;
+
+ return total.toString();
+ }
+
+ /**
+ * Returns a string representation of the given matrix with axis as well as rows and columns labeled. Pass
+ * <tt>null</tt> to one or more parameters to indicate that the corresponding decoration element shall not appear in
+ * the string converted matrix.
+ *
+ * @param matrix The matrix to format.
+ * @param sliceNames The headers of all slices (to be put above each slice).
+ * @param rowNames The headers of all rows (to be put to the left of the matrix).
+ * @param columnNames The headers of all columns (to be put to above the matrix).
+ * @param sliceAxisName The label of the z-axis (to be put above each slice).
+ * @param rowAxisName The label of the y-axis.
+ * @param columnAxisName The label of the x-axis.
+ * @param title The overall title of the matrix to be formatted.
+ * @return the matrix converted to a string.
+ */
+ public String toTitleString(ObjectMatrix3D matrix, String[] sliceNames, String[] rowNames, String[] columnNames,
+ String sliceAxisName, String rowAxisName, String columnAxisName, String title) {
+ if (matrix.size() == 0) {
+ return "Empty matrix";
+ }
+ StringBuilder buf = new StringBuilder();
+ for (int i = 0; i < matrix.slices(); i++) {
+ if (i != 0) {
+ buf.append(sliceSeparator);
+ }
+ buf.append(toTitleString(matrix.viewSlice(i), rowNames, columnNames, rowAxisName, columnAxisName,
+ title + '\n' + sliceAxisName + '=' + sliceNames[i]));
+ }
+ return buf.toString();
+ }
+}
Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Formatter.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/ObjectMatrix1DComparator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/ObjectMatrix1DComparator.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/ObjectMatrix1DComparator.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/ObjectMatrix1DComparator.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,55 @@
+package org.apache.mahout.math.matrix.objectalgo;
+
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+
+import org.apache.mahout.math.matrix.ObjectMatrix1D;
+
+/** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */
+@Deprecated
+public interface ObjectMatrix1DComparator {
+
+ /**
+ * Compares its two arguments for order. Returns a negative integer, zero, or a positive integer as the first
+ * argument is less than, equal to, or greater than the second.<p>
+ *
+ * The implementor must ensure that <tt>sgn(compare(x, y)) == -sgn(compare(y, x))</tt> for all <tt>x</tt> and
+ * <tt>y</tt>. (This implies that <tt>compare(x, y)</tt> must throw an exception if and only if <tt>compare(y,
+ * x)</tt> throws an exception.)<p>
+ *
+ * The implementor must also ensure that the relation is transitive: <tt>((compare(x, y)>0) && (compare(y,
+ * z)>0))</tt> implies <tt>compare(x, z)>0</tt>.<p>
+ *
+ * Finally, the implementer must ensure that <tt>compare(x, y)==0</tt> implies that <tt>sgn(compare(x,
+ * z))==sgn(compare(y, z))</tt> for all <tt>z</tt>.<p>
+ *
+ * @return a negative integer, zero, or a positive integer as the first argument is less than, equal to, or greater
+ * than the second.
+ */
+ int compare(ObjectMatrix1D o1, ObjectMatrix1D o2);
+
+ /**
+ * Indicates whether some other object is "equal to" this Comparator. This method must obey the general
+ * contract of <tt>Object.equals(Object)</tt>. Additionally, this method can return <tt>true</tt> <i>only</i> if the
+ * specified Object is also a comparator and it imposes the same ordering as this comparator. Thus,
+ * <code>comp1.equals(comp2)</code> implies that <tt>sgn(comp1.compare(o1, o2))==sgn(comp2.compare(o1, o2))</tt> for
+ * every element <tt>o1</tt> and <tt>o2</tt>.<p>
+ *
+ * Note that it is <i>always</i> safe <i>not</i> to override <tt>Object.equals(Object)</tt>. However, overriding this
+ * method may, in some cases, improve performance by allowing programs to determine that two distinct Comparators
+ * impose the same order.
+ *
+ * @param obj the reference object with which to compare.
+ * @return <code>true</code> only if the specified object is also a comparator and it imposes the same ordering as
+ * this comparator.
+ * @see java.lang.Object#equals(java.lang.Object)
+ * @see java.lang.Object#hashCode()
+ */
+ boolean equals(Object obj);
+}
Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/ObjectMatrix1DComparator.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/ObjectMatrix2DComparator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/ObjectMatrix2DComparator.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/ObjectMatrix2DComparator.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/ObjectMatrix2DComparator.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,55 @@
+package org.apache.mahout.math.matrix.objectalgo;
+
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+
+import org.apache.mahout.math.matrix.ObjectMatrix2D;
+
+/** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */
+@Deprecated
+public interface ObjectMatrix2DComparator {
+
+ /**
+ * Compares its two arguments for order. Returns a negative integer, zero, or a positive integer as the first
+ * argument is less than, equal to, or greater than the second.<p>
+ *
+ * The implementor must ensure that <tt>sgn(compare(x, y)) == -sgn(compare(y, x))</tt> for all <tt>x</tt> and
+ * <tt>y</tt>. (This implies that <tt>compare(x, y)</tt> must throw an exception if and only if <tt>compare(y,
+ * x)</tt> throws an exception.)<p>
+ *
+ * The implementor must also ensure that the relation is transitive: <tt>((compare(x, y)>0) && (compare(y,
+ * z)>0))</tt> implies <tt>compare(x, z)>0</tt>.<p>
+ *
+ * Finally, the implementer must ensure that <tt>compare(x, y)==0</tt> implies that <tt>sgn(compare(x,
+ * z))==sgn(compare(y, z))</tt> for all <tt>z</tt>.<p>
+ *
+ * @return a negative integer, zero, or a positive integer as the first argument is less than, equal to, or greater
+ * than the second.
+ */
+ int compare(ObjectMatrix2D o1, ObjectMatrix2D o2);
+
+ /**
+ * Indicates whether some other object is "equal to" this Comparator. This method must obey the general
+ * contract of <tt>Object.equals(Object)</tt>. Additionally, this method can return <tt>true</tt> <i>only</i> if the
+ * specified Object is also a comparator and it imposes the same ordering as this comparator. Thus,
+ * <code>comp1.equals(comp2)</code> implies that <tt>sgn(comp1.compare(o1, o2))==sgn(comp2.compare(o1, o2))</tt> for
+ * every element <tt>o1</tt> and <tt>o2</tt>.<p>
+ *
+ * Note that it is <i>always</i> safe <i>not</i> to override <tt>Object.equals(Object)</tt>. However, overriding this
+ * method may, in some cases, improve performance by allowing programs to determine that two distinct Comparators
+ * impose the same order.
+ *
+ * @param obj the reference object with which to compare.
+ * @return <code>true</code> only if the specified object is also a comparator and it imposes the same ordering as
+ * this comparator.
+ * @see java.lang.Object#equals(java.lang.Object)
+ * @see java.lang.Object#hashCode()
+ */
+ boolean equals(Object obj);
+}
Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/ObjectMatrix2DComparator.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Partitioning.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Partitioning.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Partitioning.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Partitioning.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,170 @@
+package org.apache.mahout.math.matrix.objectalgo;
+
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+
+import org.apache.mahout.math.Swapper;
+import org.apache.mahout.math.function.IntComparator;
+import org.apache.mahout.math.matrix.ObjectMatrix1D;
+import org.apache.mahout.math.matrix.ObjectMatrix2D;
+
+/** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */
+@Deprecated
+public class Partitioning {
+
+ private Partitioning() {
+ }
+
+ /**
+ * Same as {@link org.apache.mahout.math.Partitioning#partition(int[],int,int,int[],int,int,int[])} except that it
+ * <i>synchronously</i> partitions the rows of the given matrix by the values of the given matrix column; This is
+ * essentially the same as partitioning a list of composite objects by some instance variable; In other words, two
+ * entire rows of the matrix are swapped, whenever two column values indicate so. <p> Let's say, a "row" is an
+ * "object" (tuple, d-dimensional point). A "column" is the list of "object" values of a given variable (field,
+ * dimension). A "matrix" is a list of "objects" (tuples, points). <p> Now, rows (objects, tuples) are partially
+ * sorted according to their values in one given variable (dimension). Two entire rows of the matrix are swapped,
+ * whenever two column values indicate so. <p> Note that arguments are not checked for validity. <p> <b>Example:</b>
+ * <table border="1" cellspacing="0"> <tr nowrap> <td valign="top"><tt>8 x 3 matrix:<br> 23, 22, 21<br> 20, 19, 18<br>
+ * 17, 16, 15<br> 14, 13, 12<br> 11, 10, 9<br> 8, 7, 6<br> 5, 4, 3<br> 2, 1, 0 </tt></td> <td align="left"
+ * valign="top"> <p><tt>column = 0;<br> rowIndexes = {0,1,2,..,matrix.rows()-1}; rowFrom = 0;<br> rowTo =
+ * matrix.rows()-1;<br> splitters = {5,10,12}<br> c = 0; <br> d = splitters.length-1;<br>
+ * partition(matrix,rowIndexes,rowFrom,rowTo,column,splitters,c,d,splitIndexes);<br> ==><br> splitIndexes == {0, 2,
+ * 3}<br> rowIndexes == {7, 6, 5, 4, 0, 1, 2, 3}</tt></p> </td> <td valign="top"> The matrix IS NOT REORDERED.<br>
+ * Here is how it would look<br> like, if it would be reordered<br> accoring to <tt>rowIndexes</tt>.<br> <tt>8 x 3
+ * matrix:<br> 2, 1, 0<br> 5, 4, 3<br> 8, 7, 6<br> 11, 10, 9<br> 23, 22, 21<br> 20, 19, 18<br> 17, 16, 15<br>
+ * 14, 13, 12 </tt></td> </tr> </table>
+ *
+ * @param matrix the matrix to be partitioned.
+ * @param rowIndexes the index of the i-th row; is modified by this method to reflect partitioned indexes.
+ * @param rowFrom the index of the first row (inclusive).
+ * @param rowTo the index of the last row (inclusive).
+ * @param column the index of the column to partition on.
+ * @param splitters the values at which the rows shall be split into intervals. Must be sorted ascending and must
+ * not contain multiple identical values. These preconditions are not checked; be sure that they
+ * are met.
+ * @param splitFrom the index of the first splitter element to be considered.
+ * @param splitTo the index of the last splitter element to be considered. The method considers the splitter
+ * elements <tt>splitters[splitFrom] .. splitters[splitTo]</tt>.
+ * @param splitIndexes a list into which this method fills the indexes of rows delimiting intervals. Upon return
+ * <tt>splitIndexes[splitFrom..splitTo]</tt> will be set accordingly. Therefore, must satisfy
+ * <tt>splitIndexes.length >= splitters.length</tt>.
+ */
+ private static void partition(ObjectMatrix2D matrix, int[] rowIndexes, int rowFrom, int rowTo, int column,
+ final Object[] splitters, int splitFrom, int splitTo, int[] splitIndexes) {
+ if (rowFrom < 0 || rowTo >= matrix.rows() || rowTo >= rowIndexes.length) {
+ throw new IllegalArgumentException();
+ }
+ if (column < 0 || column >= matrix.columns()) {
+ throw new IllegalArgumentException();
+ }
+ if (splitFrom < 0 || splitTo >= splitters.length) {
+ throw new IllegalArgumentException();
+ }
+ if (splitIndexes.length < splitters.length) {
+ throw new IllegalArgumentException();
+ }
+
+ // this one knows how to swap two row indexes (a,b)
+ final int[] g = rowIndexes;
+ Swapper swapper = new Swapper() {
+ @Override
+ public void swap(int b, int c) {
+ int tmp = g[b];
+ g[b] = g[c];
+ g[c] = tmp;
+ }
+ };
+
+ // compare splitter[a] with columnView[rowIndexes[b]]
+ final ObjectMatrix1D columnView = matrix.viewColumn(column);
+ IntComparator comp = new IntComparator() {
+ @Override
+ public int compare(int a, int b) {
+ Comparable<Object> av = (Comparable<Object>) (splitters[a]);
+ Comparable<Object> bv = (Comparable<Object>) (columnView.getQuick(g[b]));
+ int r = av.compareTo(bv);
+ return r < 0 ? -1 : (r == 0 ? 0 : 1);
+ }
+ };
+
+ // compare columnView[rowIndexes[a]] with columnView[rowIndexes[b]]
+ IntComparator comp2 = new IntComparator() {
+ @Override
+ public int compare(int a, int b) {
+ Comparable<Object> av = (Comparable<Object>) (columnView.getQuick(g[a]));
+ Comparable<Object> bv = (Comparable<Object>) (columnView.getQuick(g[b]));
+ int r = av.compareTo(bv);
+ return r < 0 ? -1 : (r == 0 ? 0 : 1);
+ }
+ };
+
+ // compare splitter[a] with splitter[b]
+ IntComparator comp3 = new IntComparator() {
+ @Override
+ public int compare(int a, int b) {
+ Comparable<Object> av = (Comparable<Object>) (splitters[a]);
+ Comparable<Object> bv = (Comparable<Object>) (splitters[b]);
+ int r = av.compareTo(bv);
+ return r < 0 ? -1 : (r == 0 ? 0 : 1);
+ }
+ };
+
+ // generic partitioning does the main work of reordering row indexes
+ org.apache.mahout.math.Partitioning.genericPartition(
+ rowFrom, rowTo, splitFrom, splitTo, splitIndexes, comp, comp2, comp3, swapper);
+ }
+
+ /**
+ * Same as {@link org.apache.mahout.math.Partitioning#partition(int[],int,int,int[],int,int,int[])} except that it
+ * <i>synchronously</i> partitions the rows of the given matrix by the values of the given matrix column; This is
+ * essentially the same as partitioning a list of composite objects by some instance variable; In other words, two
+ * entire rows of the matrix are swapped, whenever two column values indicate so. <p> Let's say, a "row" is an
+ * "object" (tuple, d-dimensional point). A "column" is the list of "object" values of a given variable (field,
+ * dimension). A "matrix" is a list of "objects" (tuples, points). <p> Now, rows (objects, tuples) are partially
+ * sorted according to their values in one given variable (dimension). Two entire rows of the matrix are swapped,
+ * whenever two column values indicate so. <p> Note that arguments are not checked for validity. <p> <b>Example:</b>
+ * <table border="1" cellspacing="0"> <tr nowrap> <td valign="top"><tt>8 x 3 matrix:<br> 23, 22, 21<br> 20, 19, 18<br>
+ * 17, 16, 15<br> 14, 13, 12<br> 11, 10, 9<br> 8, 7, 6<br> 5, 4, 3<br> 2, 1, 0 </tt></td> <td align="left"
+ * valign="top"> <tt>column = 0;<br> splitters = {5,10,12}<br> partition(matrix,column,splitters,splitIndexes);<br>
+ * ==><br> splitIndexes == {0, 2, 3}</tt></p> </td> <td valign="top"> The matrix IS NOT REORDERED.<br> The new VIEW IS
+ * REORDERED:<br> <tt>8 x 3 matrix:<br> 2, 1, 0<br> 5, 4, 3<br> 8, 7, 6<br> 11, 10, 9<br> 23, 22, 21<br> 20, 19,
+ * 18<br> 17, 16, 15<br> 14, 13, 12 </tt></td> </tr> </table>
+ *
+ * @param matrix the matrix to be partitioned.
+ * @param column the index of the column to partition on.
+ * @param splitters the values at which the rows shall be split into intervals. Must be sorted ascending and must
+ * not contain multiple identical values. These preconditions are not checked; be sure that they
+ * are met.
+ * @param splitIndexes a list into which this method fills the indexes of rows delimiting intervals. Therefore, must
+ * satisfy <tt>splitIndexes.length >= splitters.length</tt>.
+ * @return a new matrix view having rows partitioned by the given column and splitters.
+ */
+ public static ObjectMatrix2D partition(ObjectMatrix2D matrix, int column, Object[] splitters, int[] splitIndexes) {
+ int rowTo = matrix.rows() - 1;
+ int splitTo = splitters.length - 1;
+ int[] rowIndexes = new int[matrix.rows()]; // row indexes to reorder instead of matrix itself
+ for (int i = rowIndexes.length; --i >= 0;) {
+ rowIndexes[i] = i;
+ }
+
+ int splitFrom = 0;
+ int rowFrom = 0;
+ partition(matrix, rowIndexes, rowFrom, rowTo, column, splitters, splitFrom, splitTo, splitIndexes);
+
+ // take all columns in the original order
+ int[] columnIndexes = new int[matrix.columns()];
+ for (int i = columnIndexes.length; --i >= 0;) {
+ columnIndexes[i] = i;
+ }
+
+ // view the matrix according to the reordered row indexes
+ return matrix.viewSelection(rowIndexes, columnIndexes);
+ }
+
+}
Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Partitioning.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Sorting.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Sorting.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Sorting.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Sorting.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,336 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose
+is hereby granted without fee, provided that the above copyright notice appear in all copies and
+that both that copyright notice and this permission notice appear in supporting documentation.
+CERN makes no representations about the suitability of this software for any purpose.
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.matrix.objectalgo;
+
+import org.apache.mahout.math.GenericSorting;
+import org.apache.mahout.math.PersistentObject;
+import org.apache.mahout.math.function.IntComparator;
+import org.apache.mahout.math.matrix.ObjectMatrix1D;
+import org.apache.mahout.math.matrix.ObjectMatrix2D;
+import org.apache.mahout.math.matrix.ObjectMatrix3D;
+import org.apache.mahout.math.matrix.impl.AbstractFormatter;
+
+import java.util.Comparator;
+/**
+ Matrix quicksorts and mergesorts.
+ Use idioms like <tt>Sorting.quickSort.sort(...)</tt> and <tt>Sorting.mergeSort.sort(...)</tt>.
+ <p>
+ This is another case demonstrating one primary goal of this library: Delivering easy to use, yet very efficient APIs.
+ The sorts return convenient <i>sort views</i>.
+ This enables the usage of algorithms which scale well with the problem size:
+ For example, sorting a 1000000 x 10000 or a 1000000 x 100 x 100 matrix performs just as fast as sorting a 1000000 x 1 matrix.
+ This is so, because internally the algorithms only move around integer indexes, they do not physically move around entire rows or slices.
+ The original matrix is left unaffected.
+ <p>
+ The quicksort is a derivative of the JDK 1.2 V1.26 algorithms (which are, in turn, based on Bentley's and McIlroy's fine work).
+ The mergesort is a derivative of the JAL algorithms, with optimisations taken from the JDK algorithms.
+ Mergesort is <i>stable</i> (by definition), while quicksort is not.
+ A stable sort is, for example, helpful, if matrices are sorted successively
+ by multiple columns. It preserves the relative position of equal elements.
+
+ @see org.apache.mahout.math.GenericSorting
+ @see org.apache.mahout.math.Sorting
+ @see java.util.Arrays
+
+ @author wolfgang.hoschek@cern.ch
+ @version 1.1, 25/May/2000
+ */
+
+/** @deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */
+@Deprecated
+public class Sorting extends PersistentObject {
+
+ /** A prefabricated quicksort. */
+ public static final Sorting quickSort = new Sorting(); // already has quicksort implemented
+
+ /** A prefabricated mergesort. */
+ public static final Sorting mergeSort = new Sorting() { // override quicksort with mergesort
+
+ @Override
+ protected void runSort(int[] a, int fromIndex, int toIndex, IntComparator c) {
+ org.apache.mahout.math.Sorting.mergeSort(a, fromIndex, toIndex, c);
+ }
+
+ @Override
+ protected void runSort(int fromIndex, int toIndex, IntComparator c, org.apache.mahout.math.Swapper swapper) {
+ GenericSorting.mergeSort(fromIndex, toIndex, c, swapper);
+ }
+ };
+
+ /** Makes this class non instantiable, but still let's others inherit from it. */
+ private Sorting() {
+ }
+
+ protected void runSort(int[] a, int fromIndex, int toIndex, IntComparator c) {
+ org.apache.mahout.math.Sorting.quickSort(a, fromIndex, toIndex, c);
+ }
+
+ protected void runSort(int fromIndex, int toIndex, IntComparator c, org.apache.mahout.math.Swapper swapper) {
+ GenericSorting.quickSort(fromIndex, toIndex, c, swapper);
+ }
+
+ /**
+ * Sorts the vector into ascending order, according to the <i>natural ordering</i>. The returned view is backed by
+ * this matrix, so changes in the returned view are reflected in this matrix, and vice-versa. To sort ranges use
+ * sub-ranging views. To sort descending, use flip views ... <p> <b>Example:</b> <table border="1" cellspacing="0">
+ * <tr nowrap> <td valign="top"><tt> 7, 1, 3, 1<br> </tt></td> <td valign="top"> <p><tt> ==> 1, 1, 3, 7<br> The
+ * vector IS NOT SORTED.<br> The new VIEW IS SORTED.</tt></p> </td> </tr> </table>
+ *
+ * @param vector the vector to be sorted.
+ * @return a new sorted vector (matrix) view. <b>Note that the original matrix is left unaffected.</b>
+ */
+ public ObjectMatrix1D sort(final ObjectMatrix1D vector) {
+ int[] indexes = new int[vector.size()]; // row indexes to reorder instead of matrix itself
+ for (int i = indexes.length; --i >= 0;) {
+ indexes[i] = i;
+ }
+
+ IntComparator comp = new IntComparator() {
+ @Override
+ public int compare(int a, int b) {
+ Comparable<Object> av = (Comparable<Object>) (vector.getQuick(a));
+ Comparable<Object> bv = (Comparable<Object>) (vector.getQuick(b));
+ int r = av.compareTo(bv);
+ return r < 0 ? -1 : (r > 0 ? 1 : 0);
+ }
+ };
+
+ runSort(indexes, 0, indexes.length, comp);
+
+ return vector.viewSelection(indexes);
+ }
+
+ /**
+ * Sorts the vector into ascending order, according to the order induced by the specified comparator. The returned
+ * view is backed by this matrix, so changes in the returned view are reflected in this matrix, and vice-versa. The
+ * algorithm compares two cells at a time, determinining whether one is smaller, equal or larger than the other. To
+ * sort ranges use sub-ranging views. To sort descending, use flip views ... <p> <b>Example:</b>
+ * <pre>
+ * // sort by sinus of cells
+ * ObjectComparator comp = new ObjectComparator() {
+ * public int compare(Object a, Object b) {
+ * Object as = Math.sin(a); Object bs = Math.sin(b);
+ * return as < bs ? -1 : as == bs ? 0 : 1;
+ * }
+ * };
+ * sorted = quickSort(vector,comp);
+ * </pre>
+ *
+ * @param vector the vector to be sorted.
+ * @param c the comparator to determine the order.
+ * @return a new matrix view sorted as specified. <b>Note that the original vector (matrix) is left unaffected.</b>
+ */
+ public ObjectMatrix1D sort(final ObjectMatrix1D vector, final Comparator<Object> c) {
+ int[] indexes = new int[vector.size()]; // row indexes to reorder instead of matrix itself
+ for (int i = indexes.length; --i >= 0;) {
+ indexes[i] = i;
+ }
+
+ IntComparator comp = new IntComparator() {
+ @Override
+ public int compare(int a, int b) {
+ return c.compare(vector.getQuick(a), vector.getQuick(b));
+ }
+ };
+
+ runSort(indexes, 0, indexes.length, comp);
+
+ return vector.viewSelection(indexes);
+ }
+
+ /**
+ * Sorts the matrix rows into ascending order, according to the <i>natural ordering</i> of the matrix values in the
+ * given column. The returned view is backed by this matrix, so changes in the returned view are reflected in this
+ * matrix, and vice-versa. To sort ranges use sub-ranging views. To sort columns by rows, use dice views. To sort
+ * descending, use flip views ... <p> <b>Example:</b> <table border="1" cellspacing="0"> <tr nowrap> <td
+ * valign="top"><tt>4 x 2 matrix: <br> 7, 6<br> 5, 4<br> 3, 2<br> 1, 0 <br> </tt></td> <td align="left" valign="top">
+ * <p><tt>column = 0;<br> view = quickSort(matrix,column);<br> log.info(view); </tt><tt><br> ==> </tt></p> </td> <td
+ * valign="top"> <p><tt>4 x 2 matrix:<br> 1, 0<br> 3, 2<br> 5, 4<br> 7, 6</tt><br> The matrix IS NOT SORTED.<br> The
+ * new VIEW IS SORTED.</p> </td> </tr> </table>
+ *
+ * @param matrix the matrix to be sorted.
+ * @param column the index of the column inducing the order.
+ * @return a new matrix view having rows sorted by the given column. <b>Note that the original matrix is left
+ * unaffected.</b>
+ * @throws IndexOutOfBoundsException if <tt>column < 0 || column >= matrix.columns()</tt>.
+ */
+ public ObjectMatrix2D sort(ObjectMatrix2D matrix, int column) {
+ if (column < 0 || column >= matrix.columns()) {
+ throw new IndexOutOfBoundsException("column=" + column + ", matrix=" + AbstractFormatter.shape(matrix));
+ }
+
+ int[] rowIndexes = new int[matrix.rows()]; // row indexes to reorder instead of matrix itself
+ for (int i = rowIndexes.length; --i >= 0;) {
+ rowIndexes[i] = i;
+ }
+
+ final ObjectMatrix1D col = matrix.viewColumn(column);
+ IntComparator comp = new IntComparator() {
+ @Override
+ public int compare(int a, int b) {
+ Comparable<Object> av = (Comparable<Object>) (col.getQuick(a));
+ Comparable<Object> bv = (Comparable<Object>) (col.getQuick(b));
+ int r = av.compareTo(bv);
+ return r < 0 ? -1 : (r > 0 ? 1 : 0);
+ }
+ };
+
+ runSort(rowIndexes, 0, rowIndexes.length, comp);
+
+ // view the matrix according to the reordered row indexes
+ // take all columns in the original order
+ return matrix.viewSelection(rowIndexes, null);
+ }
+
+ /**
+ * Sorts the matrix rows according to the order induced by the specified comparator. The returned view is backed by
+ * this matrix, so changes in the returned view are reflected in this matrix, and vice-versa. The algorithm compares
+ * two rows (1-d matrices) at a time, determinining whether one is smaller, equal or larger than the other. To sort
+ * ranges use sub-ranging views. To sort columns by rows, use dice views. To sort descending, use flip views ... <p>
+ * <b>Example:</b>
+ * <pre>
+ * // sort by sum of values in a row
+ * ObjectMatrix1DComparator comp = new ObjectMatrix1DComparator() {
+ * public int compare(ObjectMatrix1D a, ObjectMatrix1D b) {
+ * Object as = a.zSum(); Object bs = b.zSum();
+ * return as < bs ? -1 : as == bs ? 0 : 1;
+ * }
+ * };
+ * sorted = quickSort(matrix,comp);
+ * </pre>
+ *
+ * @param matrix the matrix to be sorted.
+ * @param c the comparator to determine the order.
+ * @return a new matrix view having rows sorted as specified. <b>Note that the original matrix is left
+ * unaffected.</b>
+ */
+ public ObjectMatrix2D sort(ObjectMatrix2D matrix, final ObjectMatrix1DComparator c) {
+ int[] rowIndexes = new int[matrix.rows()]; // row indexes to reorder instead of matrix itself
+ for (int i = rowIndexes.length; --i >= 0;) {
+ rowIndexes[i] = i;
+ }
+
+ final ObjectMatrix1D[] views = new ObjectMatrix1D[matrix.rows()]; // precompute views for speed
+ for (int i = views.length; --i >= 0;) {
+ views[i] = matrix.viewRow(i);
+ }
+
+ IntComparator comp = new IntComparator() {
+ @Override
+ public int compare(int a, int b) {
+ //return c.compare(matrix.viewRow(a), matrix.viewRow(b));
+ return c.compare(views[a], views[b]);
+ }
+ };
+
+ runSort(rowIndexes, 0, rowIndexes.length, comp);
+
+ // view the matrix according to the reordered row indexes
+ // take all columns in the original order
+ return matrix.viewSelection(rowIndexes, null);
+ }
+
+ /**
+ * Sorts the matrix slices into ascending order, according to the <i>natural ordering</i> of the matrix values in the
+ * given <tt>[row,column]</tt> position. The returned view is backed by this matrix, so changes in the returned view
+ * are reflected in this matrix, and vice-versa. To sort ranges use sub-ranging views. To sort by other dimensions,
+ * use dice views. To sort descending, use flip views ... <p> The algorithm compares two 2-d slices at a time,
+ * determinining whether one is smaller, equal or larger than the other. Comparison is based on the cell
+ * <tt>[row,column]</tt> within a slice. Let <tt>A</tt> and <tt>B</tt> be two 2-d slices. Then we have the following
+ * rules <ul> <li><tt>A < B iff A.get(row,column) < B.get(row,column)</tt> <li><tt>A == B iff
+ * A.get(row,column) == B.get(row,column)</tt> <li><tt>A > B iff A.get(row,column) > B.get(row,column)</tt>
+ * </ul>
+ *
+ * @param matrix the matrix to be sorted.
+ * @param row the index of the row inducing the order.
+ * @param column the index of the column inducing the order.
+ * @return a new matrix view having slices sorted by the values of the slice view <tt>matrix.viewRow(row).viewColumn(column)</tt>.
+ * <b>Note that the original matrix is left unaffected.</b>
+ * @throws IndexOutOfBoundsException if <tt>row < 0 || row >= matrix.rows() || column < 0 || column >=
+ * matrix.columns()</tt>.
+ */
+ public ObjectMatrix3D sort(ObjectMatrix3D matrix, int row, int column) {
+ if (row < 0 || row >= matrix.rows()) {
+ throw new IndexOutOfBoundsException("row=" + row + ", matrix=" + AbstractFormatter.shape(matrix));
+ }
+ if (column < 0 || column >= matrix.columns()) {
+ throw new IndexOutOfBoundsException("column=" + column + ", matrix=" + AbstractFormatter.shape(matrix));
+ }
+
+ int[] sliceIndexes = new int[matrix.slices()]; // indexes to reorder instead of matrix itself
+ for (int i = sliceIndexes.length; --i >= 0;) {
+ sliceIndexes[i] = i;
+ }
+
+ final ObjectMatrix1D sliceView = matrix.viewRow(row).viewColumn(column);
+ IntComparator comp = new IntComparator() {
+ @Override
+ public int compare(int a, int b) {
+ Comparable<Object> av = (Comparable<Object>) (sliceView.getQuick(a));
+ Comparable<Object> bv = (Comparable<Object>) (sliceView.getQuick(b));
+ int r = av.compareTo(bv);
+ return r < 0 ? -1 : (r > 0 ? 1 : 0);
+ }
+ };
+
+ runSort(sliceIndexes, 0, sliceIndexes.length, comp);
+
+ // view the matrix according to the reordered slice indexes
+ // take all rows and columns in the original order
+ return matrix.viewSelection(sliceIndexes, null, null);
+ }
+
+ /**
+ * Sorts the matrix slices according to the order induced by the specified comparator. The returned view is backed by
+ * this matrix, so changes in the returned view are reflected in this matrix, and vice-versa. The algorithm compares
+ * two slices (2-d matrices) at a time, determinining whether one is smaller, equal or larger than the other. To sort
+ * ranges use sub-ranging views. To sort by other dimensions, use dice views. To sort descending, use flip views ...
+ * <p> <b>Example:</b>
+ * <pre>
+ * // sort by sum of values in a slice
+ * ObjectMatrix2DComparator comp = new ObjectMatrix2DComparator() {
+ * public int compare(ObjectMatrix2D a, ObjectMatrix2D b) {
+ * Object as = a.zSum(); Object bs = b.zSum();
+ * return as < bs ? -1 : as == bs ? 0 : 1;
+ * }
+ * };
+ * sorted = quickSort(matrix,comp);
+ * </pre>
+ *
+ * @param matrix the matrix to be sorted.
+ * @param c the comparator to determine the order.
+ * @return a new matrix view having slices sorted as specified. <b>Note that the original matrix is left
+ * unaffected.</b>
+ */
+ public ObjectMatrix3D sort(ObjectMatrix3D matrix, final ObjectMatrix2DComparator c) {
+ int[] sliceIndexes = new int[matrix.slices()]; // indexes to reorder instead of matrix itself
+ for (int i = sliceIndexes.length; --i >= 0;) {
+ sliceIndexes[i] = i;
+ }
+
+ final ObjectMatrix2D[] views = new ObjectMatrix2D[matrix.slices()]; // precompute views for speed
+ for (int i = views.length; --i >= 0;) {
+ views[i] = matrix.viewSlice(i);
+ }
+
+ IntComparator comp = new IntComparator() {
+ @Override
+ public int compare(int a, int b) {
+ //return c.compare(matrix.viewSlice(a), matrix.viewSlice(b));
+ return c.compare(views[a], views[b]);
+ }
+ };
+
+ runSort(sliceIndexes, 0, sliceIndexes.length, comp);
+
+ // view the matrix according to the reordered slice indexes
+ // take all rows and columns in the original order
+ return matrix.viewSelection(sliceIndexes, null, null);
+ }
+}
Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/Sorting.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/package.html
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/package.html?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/package.html (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/package.html Thu Dec 17 23:22:16 2009
@@ -0,0 +1,5 @@
+<HTML>
+<BODY>
+Object matrix <i>algorithms</i> such as print formatting, sorting, partitioning and statistics.
+</BODY>
+</HTML>
\ No newline at end of file
Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/objectalgo/package.html
------------------------------------------------------------------------------
svn:eol-style = native