You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by im281 <im...@gmail.com> on 2016/11/08 03:50:05 UTC

TallSkinnyQR

I am getting the correct rows but they are out of order. Is this a bug or am
I doing something wrong?




public class CoordinateMatrixDemo {

	public static void main(String[] args) {

		//boiler plate needed to run locally  
		SparkConf conf = new SparkConf().setAppName("Simple
Application").setMaster("local");
		JavaSparkContext sc = new JavaSparkContext(conf);

		SparkSession spark = SparkSession
				.builder()
				.appName("CoordinateMatrix")
				.getOrCreate()
				.newSession();

		run(sc,"Data/sparsematrix.txt");
	}


	private static void run(JavaSparkContext sc, String file) {

		//Read coordinate matrix from text or database
		JavaRDD<String> fileA = sc.textFile(file);

		//map text file with coordinate data (sparse matrix) to
JavaRDD<MatrixEntry>
		JavaRDD<MatrixEntry> matrixA = fileA.map(new Function<String,
MatrixEntry>() {
			public MatrixEntry call(String x){
				String[] indeceValue = x.split(",");
				long i = Long.parseLong(indeceValue[0]);
				long j = Long.parseLong(indeceValue[1]);
				double value = Double.parseDouble(indeceValue[2]);
				return new MatrixEntry(i, j, value );
			}
		});

		//coordinate matrix from sparse data
		CoordinateMatrix cooMatrixA = new CoordinateMatrix(matrixA.rdd());

		//create block matrix
		BlockMatrix matA = cooMatrixA.toBlockMatrix();

		//create block matrix after matrix multiplication (square matrix)
		BlockMatrix ata = matA.transpose().multiply(matA);

		//print out the original dense matrix
		System.out.println(matA.toLocalMatrix().toString());

		//print out the transpose of the dense matrix
		System.out.println(matA.transpose().toLocalMatrix().toString());

		//print out the square matrix (after multiplication)
		System.out.println(ata.toLocalMatrix().toString());

		JavaRDD<MatrixEntry> entries =
ata.toCoordinateMatrix().entries().toJavaRDD();



		//QR decomposition DEMO
		// Convert it to an IndexRowMatrix whose rows are sparse vectors.
		IndexedRowMatrix indexedRowMatrix = cooMatrixA.toIndexedRowMatrix();

		// Drop its row indices.
		RowMatrix rowMat = indexedRowMatrix.toRowMatrix();

		// QR decomposition 
		*QRDecomposition<RowMatrix, Matrix> result = rowMat.tallSkinnyQR(true);*

		*System.out.println("Q: " + result.Q().toBreeze().toString());*
		System.out.println("R: " + result.R().toString());
		
   
		Vector[] collectPartitions = (Vector[]) result.Q().rows().collect();
		
		System.out.println("Q factor is:");
	    for (Vector vector : collectPartitions) {
	      System.out.println("\t" + vector);
	    }
		
		
		
		
		
		
		//compute Qt
		//need to compute d = Qt*b where b is the experimental
		//Then solve for d using Gaussian elimination    

		//Extract Q values and create matrix
		//TODO:! The array will be HUGE
		String Qm = result.Q().toBreeze().toString();
		String[] Qmatrix = Qm.split("\\s+");
		
		int rows = (int)result.Q().numRows();
		int cols = (int)result.Q().numCols();
		
		try {
			PrintWriter pw = new PrintWriter("Data/qMatrix.txt");
			pw.write(Qm);
			pw.close();
			
			PrintWriter pw1 = new PrintWriter("Data/qMatrix1.txt");
			//write coordinate matrix to file
			int k = 0;
			for(int i = 0; i < (int)result.Q().numRows();i++){
				for(int j = 0; j < (int)result.Q().numCols();j++){
					pw1.println(i + "," + j + "," + Qmatrix[k]);
					k++;
				}
			}
			pw1.close();

		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		//Read coordinate matrix from text or database
		JavaRDD<String> fileQ = sc.textFile("Data/qMatrix1.txt");

		//somehow map with coordinate data (sparse matrix) to JavaRDD<MatrixEntry>
		JavaRDD<MatrixEntry> matrixQ = fileQ.map(new Function<String,
MatrixEntry>() {
			long row = 0;
			long col = 0;
			public MatrixEntry call(String x){
				String[] indeceValue = x.split(",");	
				long i = Long.parseLong(indeceValue[0]);
				long j = Long.parseLong(indeceValue[1]);
				double value = Double.parseDouble(indeceValue[2]);
				return new MatrixEntry(i, j, value );
			}
		});

		//coordinate matrix from sparse data
		CoordinateMatrix cooMatrixQ = new CoordinateMatrix(matrixQ.rdd());

		//create block matrix
		BlockMatrix matQ = cooMatrixQ.toBlockMatrix();
		
		//create block matrix after matrix multiplication (square matrix)
		BlockMatrix Qt = matQ.transpose();

		//print out the original dense matrix
		System.out.println("Matrix Q!!!!   " +  matQ.toLocalMatrix().toString());


		//create vector from coordinate matrix (text or database)
		//Read coordinate matrix from text or database
		JavaRDD<String> fileB = sc.textFile("Data/sparsevector.txt");

		//map text file with coordinate data (sparse matrix) to
JavaRDD<MatrixEntry>
		JavaRDD<MatrixEntry> matrixV = fileB.map(new Function<String,
MatrixEntry>() {
			public MatrixEntry call(String x){
				String[] indeceValue = x.split(",");
				long i = Long.parseLong(indeceValue[0]);
				long j = Long.parseLong(indeceValue[1]);
				double value = Double.parseDouble(indeceValue[2]);
				return new MatrixEntry(i, j, value );
			}
		});

		//coordinate matrix from sparse data
		CoordinateMatrix cooMatrixV = new CoordinateMatrix(matrixV.rdd());

		//create block matrix
		BlockMatrix matB = cooMatrixV.toBlockMatrix();
		
		BlockMatrix matD = Qt.multiply(matB);

		//we have A and b (Can solve Ax = b now)
		//b is just a m x 1 matrix
		Matrix v = matB.toLocalMatrix();	

		System.out.println(v);

		DenseMatrix r = (DenseMatrix) result.R();

		System.out.println(r);

		double[] A = r.toArray();    
		double[] b = v.toArray();
		double[] d = matD.toLocalMatrix().toArray();
		double[][] m = new double[r.numRows()][r.numCols()];

		int k = 0;
		for(int i = 0; i < r.numRows();i++){
			for(int j = 0;j < r.numCols();j++ ){
				m[j][i] = A[k];
				k++;
			}
		}
		
		GaussianElimination ge = new GaussianElimination();
		double[] resultV =ge.lsolve(m, d);
		for(int i = 0; i < 3;i++){
			System.out.println(resultV[i]);
		}
			


		//NONE of these commands work!
		//JavaRDD<String> output = entries.map(new Function<MatrixEntry, String>()
{
		//    public String call(MatrixEntry e) {
		//        return String.format("%d,%d,%s", e.i(), e.j(), e.value());
		//    }
		//});

		//entries.saveAsTextFile("Data/output1");
		//output.saveAsTextFile("Data/output1");
	
//ata.toCoordinateMatrix().entries().toJavaRDD().saveAsTextFile("Data/Output1");	    

	}		
}









--
View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/TallSkinnyQR-tp28039.html
Sent from the Apache Spark User List mailing list archive at Nabble.com.

---------------------------------------------------------------------
To unsubscribe e-mail: user-unsubscribe@spark.apache.org


Re: TallSkinnyQR

Posted by Iman Mohtashemi <im...@gmail.com>.
So
 b =
0.89
0.42
0.0
0.88
0.97
The solution at the bottom is the solution to Ax = b solved using Gaussian
elimination. I guess another question is, is there another way to solve
this problem? I'm trying to solve the least squares fit with a huge A (5MM
x 1MM)

x = inverse(A-transpose*A)*A-transose*b

but I didn't see any functions for matrix inversion

I suppose I can use an iterative solver but I didn't see that either which
is why I chose the QR decomposition , solve for Q and then Q-transpose*b =
d and the solve Lx = d which would give the solution. But I don't think
this would work either since the matrices are local copies and not RDD data
structures. Any advice would be appreciated...
Iman

P.S. I also looked in the linear regression class in the mlib but I haven't
seen any examples with sparse matrix and sparse vectors as the input just
'Dataset' If you have a code example of this this would work??


On Tue, Nov 8, 2016 at 6:41 AM Iman Mohtashemi <im...@gmail.com>
wrote:

> Hi Sean,
> Here you go:
>
> sparsematrix.txt =
>
> row, col ,val
> 0,0,.42
> 0,1,.28
> 0,2,.89
> 1,0,.83
> 1,1,.34
> 1,2,.42
> 2,0,.23
> 3,0,.42
> 3,1,.98
> 3,2,.88
> 4,0,.23
> 4,1,.36
> 4,2,.97
>
> The vector is just the third column of the matrix which should give the
> trivial solution of [0,0,1]
>
> This translates to this which is correct
> There are zeros in the matrix (Not really sparse but just an example)
> 0.42  0.28  0.89
> 0.83  0.34  0.42
> 0.23  0.0   0.0
> 0.42  0.98  0.88
> 0.23  0.36  0.97
>
>
> Here is what I get for  the Q and R
>
> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
> 0.0                  0.7662808691141717   0.7553315911660984
> 0.0                  0.0                  0.7785210939368136
>
> When running this in matlab the numbers are the same but row 1 is the last
> row and the last row is interchanged with row 3
>
>
>
> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>
> Rather than post a large section of code, please post a small example of
> the input matrix and its decomposition, to illustrate what you're saying is
> out of order.
>
> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>
> I am getting the correct rows but they are out of order. Is this a bug or
> am
> I doing something wrong?
>
>
>

Re: TallSkinnyQR

Posted by Iman Mohtashemi <im...@gmail.com>.
Hi guys,
Here is another problem I encountered using the tallskinny QR. I've
attached some clear documentation of the problem. I posted it on the forum
but I'm not sure if it went through
Best regards,
Iman

On Fri, Dec 30, 2016 at 9:22 AM Sean Owen <so...@cloudera.com> wrote:

> There are no changes to Spark at all here. See my workaround below.
>
> On Fri, Dec 30, 2016, 17:18 Iman Mohtashemi <im...@gmail.com>
> wrote:
>
>> Hi guys,
>> Are your changes/bug fixes reflected in the Spark 2.1 release?
>> Iman
>>
>> On Dec 2, 2016 3:03 PM, "Iman Mohtashemi" <im...@gmail.com>
>> wrote:
>>
>> Thanks again! This is very helpful!
>> Best regards,
>> Iman
>>
>> On Dec 2, 2016 2:49 PM, "Huamin Li" <3e...@gmail.com> wrote:
>>
>> Hi Iman,
>>
>> You can get my code from https://github.com/hl475/svd/tree/testSVD. In
>> additional to fix the index issue for IndexedRowMatrix (
>> https://issues.apache.org/jira/browse/SPARK-8614), I have made some the
>> following changes as well:
>>
>> (1) Add tallSkinnySVD and computeSVDbyGram to indexedRowMatrix.
>> (2) Add shuffle.scala to mllib/src/main/scala/org/apach
>> e/spark/mllib/linalg/distributed/ (you need this if you want to use
>> tallSkinnySVD). There was a bug about shuffle method in breeze, and I sent
>> the pull request to https://github.com/scalanlp/breeze/pull/571.
>> However, the pull request has been merged to breeze 0.13, whereas the
>> version of breeze for current Spark is 0.12.
>> (3) Add partialSVD to BlockMatrix which computes the randomized singular
>> value decomposition of a given BlockMatrix.
>>
>> The new SVD methods (tallSkinnySVD, computeSVDbyGram, and partialSVD) are
>> in beta version right now. You are totally welcome to test it and share the
>> feedback with me!
>>
>> I implemented these codes for my summer intern project with Mark Tygert,
>> and we are currently testing the performance of the new codes.
>>
>> Best,
>> Huamin
>>
>> On Fri, Dec 2, 2016 at 2:07 PM, Iman Mohtashemi <
>> iman.mohtashemi@gmail.com> wrote:
>>
>> Great thanks! Where can I get the latest with the bug fixes?
>> best regards,
>> Iman
>>
>> On Fri, Dec 2, 2016 at 10:54 AM Huamin Li <3e...@gmail.com> wrote:
>>
>> Hi,
>>
>> There seems to be a bug in the section of code that converts the
>> RowMatrix format back into indexedRowMatrix format.
>>
>> For RowMatrix, I think the singular values and right singular vectors
>> (not the left singular vectors U) that computeSVD computes are correct when
>> using multiple executors/machines; Only the R (not the Q) in tallSkinnyQR
>> is correct when using multiple executors/machines. U and Q were being
>> stored in RowMatrix format. There is no index information about RowMatrix,
>> so it does not make sense for U and Q.
>>
>> Others have run into this same problem (
>> https://issues.apache.org/jira/browse/SPARK-8614)
>>
>> I think the quick solution for this problem is copy and paste the multiply,
>> computeSVD, and tallSkinnyQR code from RowMatrix to IndexedRowMatrix and
>> make the corresponding changes although this would result in code
>> duplication.
>>
>> I have fixed the problem by what I mentioned above. Now, multiply,
>> computeSVD, and tallSkinnyQR are giving the correct results for
>> indexedRowMatrix when using multiple executors or workers. Let me know
>> if I should do a pull request for this.
>>
>> Best,
>> Huamin
>>
>> On Fri, Dec 2, 2016 at 11:23 AM, Iman Mohtashemi <
>> iman.mohtashemi@gmail.com> wrote:
>>
>> Ok thanks.
>>
>> On Fri, Dec 2, 2016 at 8:19 AM Sean Owen <so...@cloudera.com> wrote:
>>
>> I tried, but enforcing the ordering changed a fair bit of behavior and I
>> gave up. I think the way to think of it is: a RowMatrix has whatever
>> ordering you made it with, so you need to give it ordered rows if you're
>> going to use a method like the QR decomposition. That works. I don't think
>> the QR method should ever have been on this class though, for this reason.
>>
>> On Fri, Dec 2, 2016 at 4:13 PM Iman Mohtashemi <im...@gmail.com>
>> wrote:
>>
>> Hi guys,
>> Was this bug ever resolved?
>> Iman
>>
>> On Fri, Nov 11, 2016 at 9:59 AM Iman Mohtashemi <
>> iman.mohtashemi@gmail.com> wrote:
>>
>> Yes this would be helpful, otherwise the Q part of the decomposition is
>> useless. One can use that to solve the system by transposing it and
>> multiplying with b and solving for x  (Ax = b) where A = R and b = Qt*b
>> since the Upper triangular matrix is correctly available (R)
>>
>> On Fri, Nov 11, 2016 at 3:56 AM Sean Owen <so...@cloudera.com> wrote:
>>
>> @Xiangrui / @Joseph, do you think it would be reasonable to have
>> CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
>> order to make the ultimate output of toRowMatrix less surprising when it's
>> not ordered?
>>
>>
>> On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:
>>
>> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
>> result in a RowMatrix with rows in order of their indices, necessarily:
>>
>>
>> // Drop its row indices.
>> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>>
>> What you get is a matrix where the rows are arranged in whatever order
>> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
>> the ordering doesn't matter, but then it's maybe surprising it has a QR
>> decomposition method, because clearly the result depends on the order of
>> rows in the input. (CC Yuhao Yang for a comment?)
>>
>> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
>> least something with sorted rows? that would not be hard. It also won't
>> return "missing" rows (all zeroes), so it would not in any event result in
>> a RowMatrix whose implicit rows and ordering represented the same matrix.
>> That, at least, strikes me as something to be better documented.
>>
>> Maybe it would be nicer still to at least sort the rows, given the
>> existence of use cases like yours. For example, at least
>> CoordinateMatrix.toIndexedRowMatrix could sort? that is less surprising.
>>
>> In any event you should be able to make it work by manually getting the
>> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
>> to Vectors and making a RowMatrix from it.
>>
>>
>>
>> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
>> wrote:
>>
>> Hi Sean,
>> Here you go:
>>
>> sparsematrix.txt =
>>
>> row, col ,val
>> 0,0,.42
>> 0,1,.28
>> 0,2,.89
>> 1,0,.83
>> 1,1,.34
>> 1,2,.42
>> 2,0,.23
>> 3,0,.42
>> 3,1,.98
>> 3,2,.88
>> 4,0,.23
>> 4,1,.36
>> 4,2,.97
>>
>> The vector is just the third column of the matrix which should give the
>> trivial solution of [0,0,1]
>>
>> This translates to this which is correct
>> There are zeros in the matrix (Not really sparse but just an example)
>> 0.42  0.28  0.89
>> 0.83  0.34  0.42
>> 0.23  0.0   0.0
>> 0.42  0.98  0.88
>> 0.23  0.36  0.97
>>
>>
>> Here is what I get for  the Q and R
>>
>> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
>> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
>> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
>> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
>> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
>> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
>> 0.0                  0.7662808691141717   0.7553315911660984
>> 0.0                  0.0                  0.7785210939368136
>>
>> When running this in matlab the numbers are the same but row 1 is the
>> last row and the last row is interchanged with row 3
>>
>>
>>
>> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>>
>> Rather than post a large section of code, please post a small example of
>> the input matrix and its decomposition, to illustrate what you're saying is
>> out of order.
>>
>> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>>
>> I am getting the correct rows but they are out of order. Is this a bug or
>> am
>> I doing something wrong?
>>
>>
>>
>>
>>

Re: TallSkinnyQR

Posted by Sean Owen <so...@cloudera.com>.
There are no changes to Spark at all here. See my workaround below.

On Fri, Dec 30, 2016, 17:18 Iman Mohtashemi <im...@gmail.com>
wrote:

> Hi guys,
> Are your changes/bug fixes reflected in the Spark 2.1 release?
> Iman
>
> On Dec 2, 2016 3:03 PM, "Iman Mohtashemi" <im...@gmail.com>
> wrote:
>
> Thanks again! This is very helpful!
> Best regards,
> Iman
>
> On Dec 2, 2016 2:49 PM, "Huamin Li" <3e...@gmail.com> wrote:
>
> Hi Iman,
>
> You can get my code from https://github.com/hl475/svd/tree/testSVD. In
> additional to fix the index issue for IndexedRowMatrix (
> https://issues.apache.org/jira/browse/SPARK-8614), I have made some the
> following changes as well:
>
> (1) Add tallSkinnySVD and computeSVDbyGram to indexedRowMatrix.
> (2) Add shuffle.scala to mllib/src/main/scala/org/apach
> e/spark/mllib/linalg/distributed/ (you need this if you want to use
> tallSkinnySVD). There was a bug about shuffle method in breeze, and I sent
> the pull request to https://github.com/scalanlp/breeze/pull/571. However,
> the pull request has been merged to breeze 0.13, whereas the version of
> breeze for current Spark is 0.12.
> (3) Add partialSVD to BlockMatrix which computes the randomized singular
> value decomposition of a given BlockMatrix.
>
> The new SVD methods (tallSkinnySVD, computeSVDbyGram, and partialSVD) are
> in beta version right now. You are totally welcome to test it and share the
> feedback with me!
>
> I implemented these codes for my summer intern project with Mark Tygert,
> and we are currently testing the performance of the new codes.
>
> Best,
> Huamin
>
> On Fri, Dec 2, 2016 at 2:07 PM, Iman Mohtashemi <iman.mohtashemi@gmail.com
> > wrote:
>
> Great thanks! Where can I get the latest with the bug fixes?
> best regards,
> Iman
>
> On Fri, Dec 2, 2016 at 10:54 AM Huamin Li <3e...@gmail.com> wrote:
>
> Hi,
>
> There seems to be a bug in the section of code that converts the RowMatrix
> format back into indexedRowMatrix format.
>
> For RowMatrix, I think the singular values and right singular vectors
> (not the left singular vectors U) that computeSVD computes are correct when
> using multiple executors/machines; Only the R (not the Q) in tallSkinnyQR
> is correct when using multiple executors/machines. U and Q were being
> stored in RowMatrix format. There is no index information about RowMatrix,
> so it does not make sense for U and Q.
>
> Others have run into this same problem (
> https://issues.apache.org/jira/browse/SPARK-8614)
>
> I think the quick solution for this problem is copy and paste the multiply,
> computeSVD, and tallSkinnyQR code from RowMatrix to IndexedRowMatrix and
> make the corresponding changes although this would result in code
> duplication.
>
> I have fixed the problem by what I mentioned above. Now, multiply,
> computeSVD, and tallSkinnyQR are giving the correct results for
> indexedRowMatrix when using multiple executors or workers. Let me know if
> I should do a pull request for this.
>
> Best,
> Huamin
>
> On Fri, Dec 2, 2016 at 11:23 AM, Iman Mohtashemi <
> iman.mohtashemi@gmail.com> wrote:
>
> Ok thanks.
>
> On Fri, Dec 2, 2016 at 8:19 AM Sean Owen <so...@cloudera.com> wrote:
>
> I tried, but enforcing the ordering changed a fair bit of behavior and I
> gave up. I think the way to think of it is: a RowMatrix has whatever
> ordering you made it with, so you need to give it ordered rows if you're
> going to use a method like the QR decomposition. That works. I don't think
> the QR method should ever have been on this class though, for this reason.
>
> On Fri, Dec 2, 2016 at 4:13 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Hi guys,
> Was this bug ever resolved?
> Iman
>
> On Fri, Nov 11, 2016 at 9:59 AM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Yes this would be helpful, otherwise the Q part of the decomposition is
> useless. One can use that to solve the system by transposing it and
> multiplying with b and solving for x  (Ax = b) where A = R and b = Qt*b
> since the Upper triangular matrix is correctly available (R)
>
> On Fri, Nov 11, 2016 at 3:56 AM Sean Owen <so...@cloudera.com> wrote:
>
> @Xiangrui / @Joseph, do you think it would be reasonable to have
> CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
> order to make the ultimate output of toRowMatrix less surprising when it's
> not ordered?
>
>
> On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:
>
> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
> result in a RowMatrix with rows in order of their indices, necessarily:
>
>
> // Drop its row indices.
> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>
> What you get is a matrix where the rows are arranged in whatever order
> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
> the ordering doesn't matter, but then it's maybe surprising it has a QR
> decomposition method, because clearly the result depends on the order of
> rows in the input. (CC Yuhao Yang for a comment?)
>
> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
> least something with sorted rows? that would not be hard. It also won't
> return "missing" rows (all zeroes), so it would not in any event result in
> a RowMatrix whose implicit rows and ordering represented the same matrix.
> That, at least, strikes me as something to be better documented.
>
> Maybe it would be nicer still to at least sort the rows, given the
> existence of use cases like yours. For example, at least
> CoordinateMatrix.toIndexedRowMatrix could sort? that is less surprising.
>
> In any event you should be able to make it work by manually getting the
> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
> to Vectors and making a RowMatrix from it.
>
>
>
> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Hi Sean,
> Here you go:
>
> sparsematrix.txt =
>
> row, col ,val
> 0,0,.42
> 0,1,.28
> 0,2,.89
> 1,0,.83
> 1,1,.34
> 1,2,.42
> 2,0,.23
> 3,0,.42
> 3,1,.98
> 3,2,.88
> 4,0,.23
> 4,1,.36
> 4,2,.97
>
> The vector is just the third column of the matrix which should give the
> trivial solution of [0,0,1]
>
> This translates to this which is correct
> There are zeros in the matrix (Not really sparse but just an example)
> 0.42  0.28  0.89
> 0.83  0.34  0.42
> 0.23  0.0   0.0
> 0.42  0.98  0.88
> 0.23  0.36  0.97
>
>
> Here is what I get for  the Q and R
>
> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
> 0.0                  0.7662808691141717   0.7553315911660984
> 0.0                  0.0                  0.7785210939368136
>
> When running this in matlab the numbers are the same but row 1 is the last
> row and the last row is interchanged with row 3
>
>
>
> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>
> Rather than post a large section of code, please post a small example of
> the input matrix and its decomposition, to illustrate what you're saying is
> out of order.
>
> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>
> I am getting the correct rows but they are out of order. Is this a bug or
> am
> I doing something wrong?
>
>
>
>
>

Re: TallSkinnyQR

Posted by Iman Mohtashemi <im...@gmail.com>.
Hi guys,
Are your changes/bug fixes reflected in the Spark 2.1 release?
Iman

On Dec 2, 2016 3:03 PM, "Iman Mohtashemi" <im...@gmail.com> wrote:

> Thanks again! This is very helpful!
> Best regards,
> Iman
>
> On Dec 2, 2016 2:49 PM, "Huamin Li" <3e...@gmail.com> wrote:
>
>> Hi Iman,
>>
>> You can get my code from https://github.com/hl475/svd/tree/testSVD. In
>> additional to fix the index issue for IndexedRowMatrix (
>> https://issues.apache.org/jira/browse/SPARK-8614), I have made some the
>> following changes as well:
>>
>> (1) Add tallSkinnySVD and computeSVDbyGram to indexedRowMatrix.
>> (2) Add shuffle.scala to mllib/src/main/scala/org/apach
>> e/spark/mllib/linalg/distributed/ (you need this if you want to use
>> tallSkinnySVD). There was a bug about shuffle method in breeze, and I sent
>> the pull request to https://github.com/scalanlp/breeze/pull/571.
>> However, the pull request has been merged to breeze 0.13, whereas the
>> version of breeze for current Spark is 0.12.
>> (3) Add partialSVD to BlockMatrix which computes the randomized singular
>> value decomposition of a given BlockMatrix.
>>
>> The new SVD methods (tallSkinnySVD, computeSVDbyGram, and partialSVD) are
>> in beta version right now. You are totally welcome to test it and share the
>> feedback with me!
>>
>> I implemented these codes for my summer intern project with Mark Tygert,
>> and we are currently testing the performance of the new codes.
>>
>> Best,
>> Huamin
>>
>> On Fri, Dec 2, 2016 at 2:07 PM, Iman Mohtashemi <
>> iman.mohtashemi@gmail.com> wrote:
>>
>>> Great thanks! Where can I get the latest with the bug fixes?
>>> best regards,
>>> Iman
>>>
>>> On Fri, Dec 2, 2016 at 10:54 AM Huamin Li <3e...@gmail.com> wrote:
>>>
>>>> Hi,
>>>>
>>>> There seems to be a bug in the section of code that converts the
>>>> RowMatrix format back into indexedRowMatrix format.
>>>>
>>>> For RowMatrix, I think the singular values and right singular vectors
>>>> (not the left singular vectors U) that computeSVD computes are correct when
>>>> using multiple executors/machines; Only the R (not the Q) in tallSkinnyQR
>>>> is correct when using multiple executors/machines. U and Q were being
>>>> stored in RowMatrix format. There is no index information about RowMatrix,
>>>> so it does not make sense for U and Q.
>>>>
>>>> Others have run into this same problem (https://issues.apache.org/jir
>>>> a/browse/SPARK-8614)
>>>>
>>>> I think the quick solution for this problem is copy and paste the multiply,
>>>> computeSVD, and tallSkinnyQR code from RowMatrix to IndexedRowMatrix
>>>> and make the corresponding changes although this would result in code
>>>> duplication.
>>>>
>>>> I have fixed the problem by what I mentioned above. Now, multiply,
>>>> computeSVD, and tallSkinnyQR are giving the correct results for
>>>> indexedRowMatrix when using multiple executors or workers. Let me know
>>>> if I should do a pull request for this.
>>>>
>>>> Best,
>>>> Huamin
>>>>
>>>> On Fri, Dec 2, 2016 at 11:23 AM, Iman Mohtashemi <
>>>> iman.mohtashemi@gmail.com> wrote:
>>>>
>>>> Ok thanks.
>>>>
>>>> On Fri, Dec 2, 2016 at 8:19 AM Sean Owen <so...@cloudera.com> wrote:
>>>>
>>>> I tried, but enforcing the ordering changed a fair bit of behavior and
>>>> I gave up. I think the way to think of it is: a RowMatrix has whatever
>>>> ordering you made it with, so you need to give it ordered rows if you're
>>>> going to use a method like the QR decomposition. That works. I don't think
>>>> the QR method should ever have been on this class though, for this reason.
>>>>
>>>> On Fri, Dec 2, 2016 at 4:13 PM Iman Mohtashemi <
>>>> iman.mohtashemi@gmail.com> wrote:
>>>>
>>>> Hi guys,
>>>> Was this bug ever resolved?
>>>> Iman
>>>>
>>>> On Fri, Nov 11, 2016 at 9:59 AM Iman Mohtashemi <
>>>> iman.mohtashemi@gmail.com> wrote:
>>>>
>>>> Yes this would be helpful, otherwise the Q part of the decomposition is
>>>> useless. One can use that to solve the system by transposing it and
>>>> multiplying with b and solving for x  (Ax = b) where A = R and b = Qt*b
>>>> since the Upper triangular matrix is correctly available (R)
>>>>
>>>> On Fri, Nov 11, 2016 at 3:56 AM Sean Owen <so...@cloudera.com> wrote:
>>>>
>>>> @Xiangrui / @Joseph, do you think it would be reasonable to have
>>>> CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
>>>> order to make the ultimate output of toRowMatrix less surprising when it's
>>>> not ordered?
>>>>
>>>>
>>>> On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:
>>>>
>>>> I think the problem here is that IndexedRowMatrix.toRowMatrix does
>>>> *not* result in a RowMatrix with rows in order of their indices,
>>>> necessarily:
>>>>
>>>>
>>>> // Drop its row indices.
>>>> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>>>>
>>>> What you get is a matrix where the rows are arranged in whatever order
>>>> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
>>>> the ordering doesn't matter, but then it's maybe surprising it has a QR
>>>> decomposition method, because clearly the result depends on the order of
>>>> rows in the input. (CC Yuhao Yang for a comment?)
>>>>
>>>> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
>>>> least something with sorted rows? that would not be hard. It also won't
>>>> return "missing" rows (all zeroes), so it would not in any event result in
>>>> a RowMatrix whose implicit rows and ordering represented the same matrix.
>>>> That, at least, strikes me as something to be better documented.
>>>>
>>>> Maybe it would be nicer still to at least sort the rows, given the
>>>> existence of use cases like yours. For example, at least
>>>> CoordinateMatrix.toIndexedRowMatrix could sort? that is less
>>>> surprising.
>>>>
>>>> In any event you should be able to make it work by manually getting the
>>>> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
>>>> to Vectors and making a RowMatrix from it.
>>>>
>>>>
>>>>
>>>> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <
>>>> iman.mohtashemi@gmail.com> wrote:
>>>>
>>>> Hi Sean,
>>>> Here you go:
>>>>
>>>> sparsematrix.txt =
>>>>
>>>> row, col ,val
>>>> 0,0,.42
>>>> 0,1,.28
>>>> 0,2,.89
>>>> 1,0,.83
>>>> 1,1,.34
>>>> 1,2,.42
>>>> 2,0,.23
>>>> 3,0,.42
>>>> 3,1,.98
>>>> 3,2,.88
>>>> 4,0,.23
>>>> 4,1,.36
>>>> 4,2,.97
>>>>
>>>> The vector is just the third column of the matrix which should give the
>>>> trivial solution of [0,0,1]
>>>>
>>>> This translates to this which is correct
>>>> There are zeros in the matrix (Not really sparse but just an example)
>>>> 0.42  0.28  0.89
>>>> 0.83  0.34  0.42
>>>> 0.23  0.0   0.0
>>>> 0.42  0.98  0.88
>>>> 0.23  0.36  0.97
>>>>
>>>>
>>>> Here is what I get for  the Q and R
>>>>
>>>> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
>>>> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
>>>> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
>>>> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
>>>> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
>>>> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
>>>> 0.0                  0.7662808691141717   0.7553315911660984
>>>> 0.0                  0.0                  0.7785210939368136
>>>>
>>>> When running this in matlab the numbers are the same but row 1 is the
>>>> last row and the last row is interchanged with row 3
>>>>
>>>>
>>>>
>>>> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>>>>
>>>> Rather than post a large section of code, please post a small example
>>>> of the input matrix and its decomposition, to illustrate what you're saying
>>>> is out of order.
>>>>
>>>> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>>>>
>>>> I am getting the correct rows but they are out of order. Is this a bug
>>>> or am
>>>> I doing something wrong?
>>>>
>>>>
>>>>
>>>>
>>

Re: TallSkinnyQR

Posted by Iman Mohtashemi <im...@gmail.com>.
Thanks again! This is very helpful!
Best regards,
Iman

On Dec 2, 2016 2:49 PM, "Huamin Li" <3e...@gmail.com> wrote:

> Hi Iman,
>
> You can get my code from https://github.com/hl475/svd/tree/testSVD. In
> additional to fix the index issue for IndexedRowMatrix (
> https://issues.apache.org/jira/browse/SPARK-8614), I have made some the
> following changes as well:
>
> (1) Add tallSkinnySVD and computeSVDbyGram to indexedRowMatrix.
> (2) Add shuffle.scala to mllib/src/main/scala/org/apach
> e/spark/mllib/linalg/distributed/ (you need this if you want to use
> tallSkinnySVD). There was a bug about shuffle method in breeze, and I sent
> the pull request to https://github.com/scalanlp/breeze/pull/571. However,
> the pull request has been merged to breeze 0.13, whereas the version of
> breeze for current Spark is 0.12.
> (3) Add partialSVD to BlockMatrix which computes the randomized singular
> value decomposition of a given BlockMatrix.
>
> The new SVD methods (tallSkinnySVD, computeSVDbyGram, and partialSVD) are
> in beta version right now. You are totally welcome to test it and share the
> feedback with me!
>
> I implemented these codes for my summer intern project with Mark Tygert,
> and we are currently testing the performance of the new codes.
>
> Best,
> Huamin
>
> On Fri, Dec 2, 2016 at 2:07 PM, Iman Mohtashemi <iman.mohtashemi@gmail.com
> > wrote:
>
>> Great thanks! Where can I get the latest with the bug fixes?
>> best regards,
>> Iman
>>
>> On Fri, Dec 2, 2016 at 10:54 AM Huamin Li <3e...@gmail.com> wrote:
>>
>>> Hi,
>>>
>>> There seems to be a bug in the section of code that converts the
>>> RowMatrix format back into indexedRowMatrix format.
>>>
>>> For RowMatrix, I think the singular values and right singular vectors
>>> (not the left singular vectors U) that computeSVD computes are correct when
>>> using multiple executors/machines; Only the R (not the Q) in tallSkinnyQR
>>> is correct when using multiple executors/machines. U and Q were being
>>> stored in RowMatrix format. There is no index information about RowMatrix,
>>> so it does not make sense for U and Q.
>>>
>>> Others have run into this same problem (https://issues.apache.org/jir
>>> a/browse/SPARK-8614)
>>>
>>> I think the quick solution for this problem is copy and paste the multiply,
>>> computeSVD, and tallSkinnyQR code from RowMatrix to IndexedRowMatrix
>>> and make the corresponding changes although this would result in code
>>> duplication.
>>>
>>> I have fixed the problem by what I mentioned above. Now, multiply,
>>> computeSVD, and tallSkinnyQR are giving the correct results for
>>> indexedRowMatrix when using multiple executors or workers. Let me know
>>> if I should do a pull request for this.
>>>
>>> Best,
>>> Huamin
>>>
>>> On Fri, Dec 2, 2016 at 11:23 AM, Iman Mohtashemi <
>>> iman.mohtashemi@gmail.com> wrote:
>>>
>>> Ok thanks.
>>>
>>> On Fri, Dec 2, 2016 at 8:19 AM Sean Owen <so...@cloudera.com> wrote:
>>>
>>> I tried, but enforcing the ordering changed a fair bit of behavior and I
>>> gave up. I think the way to think of it is: a RowMatrix has whatever
>>> ordering you made it with, so you need to give it ordered rows if you're
>>> going to use a method like the QR decomposition. That works. I don't think
>>> the QR method should ever have been on this class though, for this reason.
>>>
>>> On Fri, Dec 2, 2016 at 4:13 PM Iman Mohtashemi <
>>> iman.mohtashemi@gmail.com> wrote:
>>>
>>> Hi guys,
>>> Was this bug ever resolved?
>>> Iman
>>>
>>> On Fri, Nov 11, 2016 at 9:59 AM Iman Mohtashemi <
>>> iman.mohtashemi@gmail.com> wrote:
>>>
>>> Yes this would be helpful, otherwise the Q part of the decomposition is
>>> useless. One can use that to solve the system by transposing it and
>>> multiplying with b and solving for x  (Ax = b) where A = R and b = Qt*b
>>> since the Upper triangular matrix is correctly available (R)
>>>
>>> On Fri, Nov 11, 2016 at 3:56 AM Sean Owen <so...@cloudera.com> wrote:
>>>
>>> @Xiangrui / @Joseph, do you think it would be reasonable to have
>>> CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
>>> order to make the ultimate output of toRowMatrix less surprising when it's
>>> not ordered?
>>>
>>>
>>> On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:
>>>
>>> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
>>> result in a RowMatrix with rows in order of their indices, necessarily:
>>>
>>>
>>> // Drop its row indices.
>>> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>>>
>>> What you get is a matrix where the rows are arranged in whatever order
>>> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
>>> the ordering doesn't matter, but then it's maybe surprising it has a QR
>>> decomposition method, because clearly the result depends on the order of
>>> rows in the input. (CC Yuhao Yang for a comment?)
>>>
>>> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
>>> least something with sorted rows? that would not be hard. It also won't
>>> return "missing" rows (all zeroes), so it would not in any event result in
>>> a RowMatrix whose implicit rows and ordering represented the same matrix.
>>> That, at least, strikes me as something to be better documented.
>>>
>>> Maybe it would be nicer still to at least sort the rows, given the
>>> existence of use cases like yours. For example, at least
>>> CoordinateMatrix.toIndexedRowMatrix could sort? that is less surprising.
>>>
>>> In any event you should be able to make it work by manually getting the
>>> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
>>> to Vectors and making a RowMatrix from it.
>>>
>>>
>>>
>>> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <
>>> iman.mohtashemi@gmail.com> wrote:
>>>
>>> Hi Sean,
>>> Here you go:
>>>
>>> sparsematrix.txt =
>>>
>>> row, col ,val
>>> 0,0,.42
>>> 0,1,.28
>>> 0,2,.89
>>> 1,0,.83
>>> 1,1,.34
>>> 1,2,.42
>>> 2,0,.23
>>> 3,0,.42
>>> 3,1,.98
>>> 3,2,.88
>>> 4,0,.23
>>> 4,1,.36
>>> 4,2,.97
>>>
>>> The vector is just the third column of the matrix which should give the
>>> trivial solution of [0,0,1]
>>>
>>> This translates to this which is correct
>>> There are zeros in the matrix (Not really sparse but just an example)
>>> 0.42  0.28  0.89
>>> 0.83  0.34  0.42
>>> 0.23  0.0   0.0
>>> 0.42  0.98  0.88
>>> 0.23  0.36  0.97
>>>
>>>
>>> Here is what I get for  the Q and R
>>>
>>> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
>>> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
>>> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
>>> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
>>> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
>>> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
>>> 0.0                  0.7662808691141717   0.7553315911660984
>>> 0.0                  0.0                  0.7785210939368136
>>>
>>> When running this in matlab the numbers are the same but row 1 is the
>>> last row and the last row is interchanged with row 3
>>>
>>>
>>>
>>> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>>>
>>> Rather than post a large section of code, please post a small example of
>>> the input matrix and its decomposition, to illustrate what you're saying is
>>> out of order.
>>>
>>> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>>>
>>> I am getting the correct rows but they are out of order. Is this a bug
>>> or am
>>> I doing something wrong?
>>>
>>>
>>>
>>>
>

Re: TallSkinnyQR

Posted by Huamin Li <3e...@gmail.com>.
Hi Iman,

You can get my code from https://github.com/hl475/svd/tree/testSVD. In
additional to fix the index issue for IndexedRowMatrix (
https://issues.apache.org/jira/browse/SPARK-8614), I have made some the
following changes as well:

(1) Add tallSkinnySVD and computeSVDbyGram to indexedRowMatrix.
(2) Add shuffle.scala to mllib/src/main/scala/org/apach
e/spark/mllib/linalg/distributed/ (you need this if you want to use
tallSkinnySVD). There was a bug about shuffle method in breeze, and I sent
the pull request to https://github.com/scalanlp/breeze/pull/571. However,
the pull request has been merged to breeze 0.13, whereas the version of
breeze for current Spark is 0.12.
(3) Add partialSVD to BlockMatrix which computes the randomized singular
value decomposition of a given BlockMatrix.

The new SVD methods (tallSkinnySVD, computeSVDbyGram, and partialSVD) are
in beta version right now. You are totally welcome to test it and share the
feedback with me!

I implemented these codes for my summer intern project with Mark Tygert,
and we are currently testing the performance of the new codes.

Best,
Huamin

On Fri, Dec 2, 2016 at 2:07 PM, Iman Mohtashemi <im...@gmail.com>
wrote:

> Great thanks! Where can I get the latest with the bug fixes?
> best regards,
> Iman
>
> On Fri, Dec 2, 2016 at 10:54 AM Huamin Li <3e...@gmail.com> wrote:
>
>> Hi,
>>
>> There seems to be a bug in the section of code that converts the
>> RowMatrix format back into indexedRowMatrix format.
>>
>> For RowMatrix, I think the singular values and right singular vectors
>> (not the left singular vectors U) that computeSVD computes are correct when
>> using multiple executors/machines; Only the R (not the Q) in tallSkinnyQR
>> is correct when using multiple executors/machines. U and Q were being
>> stored in RowMatrix format. There is no index information about RowMatrix,
>> so it does not make sense for U and Q.
>>
>> Others have run into this same problem (https://issues.apache.org/
>> jira/browse/SPARK-8614)
>>
>> I think the quick solution for this problem is copy and paste the multiply,
>> computeSVD, and tallSkinnyQR code from RowMatrix to IndexedRowMatrix and
>> make the corresponding changes although this would result in code
>> duplication.
>>
>> I have fixed the problem by what I mentioned above. Now, multiply,
>> computeSVD, and tallSkinnyQR are giving the correct results for
>> indexedRowMatrix when using multiple executors or workers. Let me know
>> if I should do a pull request for this.
>>
>> Best,
>> Huamin
>>
>> On Fri, Dec 2, 2016 at 11:23 AM, Iman Mohtashemi <
>> iman.mohtashemi@gmail.com> wrote:
>>
>> Ok thanks.
>>
>> On Fri, Dec 2, 2016 at 8:19 AM Sean Owen <so...@cloudera.com> wrote:
>>
>> I tried, but enforcing the ordering changed a fair bit of behavior and I
>> gave up. I think the way to think of it is: a RowMatrix has whatever
>> ordering you made it with, so you need to give it ordered rows if you're
>> going to use a method like the QR decomposition. That works. I don't think
>> the QR method should ever have been on this class though, for this reason.
>>
>> On Fri, Dec 2, 2016 at 4:13 PM Iman Mohtashemi <im...@gmail.com>
>> wrote:
>>
>> Hi guys,
>> Was this bug ever resolved?
>> Iman
>>
>> On Fri, Nov 11, 2016 at 9:59 AM Iman Mohtashemi <
>> iman.mohtashemi@gmail.com> wrote:
>>
>> Yes this would be helpful, otherwise the Q part of the decomposition is
>> useless. One can use that to solve the system by transposing it and
>> multiplying with b and solving for x  (Ax = b) where A = R and b = Qt*b
>> since the Upper triangular matrix is correctly available (R)
>>
>> On Fri, Nov 11, 2016 at 3:56 AM Sean Owen <so...@cloudera.com> wrote:
>>
>> @Xiangrui / @Joseph, do you think it would be reasonable to have
>> CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
>> order to make the ultimate output of toRowMatrix less surprising when it's
>> not ordered?
>>
>>
>> On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:
>>
>> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
>> result in a RowMatrix with rows in order of their indices, necessarily:
>>
>>
>> // Drop its row indices.
>> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>>
>> What you get is a matrix where the rows are arranged in whatever order
>> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
>> the ordering doesn't matter, but then it's maybe surprising it has a QR
>> decomposition method, because clearly the result depends on the order of
>> rows in the input. (CC Yuhao Yang for a comment?)
>>
>> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
>> least something with sorted rows? that would not be hard. It also won't
>> return "missing" rows (all zeroes), so it would not in any event result in
>> a RowMatrix whose implicit rows and ordering represented the same matrix.
>> That, at least, strikes me as something to be better documented.
>>
>> Maybe it would be nicer still to at least sort the rows, given the
>> existence of use cases like yours. For example, at least CoordinateMatrix.toIndexedRowMatrix
>> could sort? that is less surprising.
>>
>> In any event you should be able to make it work by manually getting the
>> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
>> to Vectors and making a RowMatrix from it.
>>
>>
>>
>> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
>> wrote:
>>
>> Hi Sean,
>> Here you go:
>>
>> sparsematrix.txt =
>>
>> row, col ,val
>> 0,0,.42
>> 0,1,.28
>> 0,2,.89
>> 1,0,.83
>> 1,1,.34
>> 1,2,.42
>> 2,0,.23
>> 3,0,.42
>> 3,1,.98
>> 3,2,.88
>> 4,0,.23
>> 4,1,.36
>> 4,2,.97
>>
>> The vector is just the third column of the matrix which should give the
>> trivial solution of [0,0,1]
>>
>> This translates to this which is correct
>> There are zeros in the matrix (Not really sparse but just an example)
>> 0.42  0.28  0.89
>> 0.83  0.34  0.42
>> 0.23  0.0   0.0
>> 0.42  0.98  0.88
>> 0.23  0.36  0.97
>>
>>
>> Here is what I get for  the Q and R
>>
>> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
>> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
>> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
>> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
>> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
>> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
>> 0.0                  0.7662808691141717   0.7553315911660984
>> 0.0                  0.0                  0.7785210939368136
>>
>> When running this in matlab the numbers are the same but row 1 is the
>> last row and the last row is interchanged with row 3
>>
>>
>>
>> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>>
>> Rather than post a large section of code, please post a small example of
>> the input matrix and its decomposition, to illustrate what you're saying is
>> out of order.
>>
>> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>>
>> I am getting the correct rows but they are out of order. Is this a bug or
>> am
>> I doing something wrong?
>>
>>
>>
>>

Re: TallSkinnyQR

Posted by Iman Mohtashemi <im...@gmail.com>.
Great thanks! Where can I get the latest with the bug fixes?
best regards,
Iman

On Fri, Dec 2, 2016 at 10:54 AM Huamin Li <3e...@gmail.com> wrote:

> Hi,
>
> There seems to be a bug in the section of code that converts the RowMatrix
> format back into indexedRowMatrix format.
>
> For RowMatrix, I think the singular values and right singular vectors
> (not the left singular vectors U) that computeSVD computes are correct when
> using multiple executors/machines; Only the R (not the Q) in tallSkinnyQR
> is correct when using multiple executors/machines. U and Q were being
> stored in RowMatrix format. There is no index information about RowMatrix,
> so it does not make sense for U and Q.
>
> Others have run into this same problem (
> https://issues.apache.org/jira/browse/SPARK-8614)
>
> I think the quick solution for this problem is copy and paste the multiply,
> computeSVD, and tallSkinnyQR code from RowMatrix to IndexedRowMatrix and
> make the corresponding changes although this would result in code
> duplication.
>
> I have fixed the problem by what I mentioned above. Now, multiply,
> computeSVD, and tallSkinnyQR are giving the correct results for
> indexedRowMatrix when using multiple executors or workers. Let me know if
> I should do a pull request for this.
>
> Best,
> Huamin
>
> On Fri, Dec 2, 2016 at 11:23 AM, Iman Mohtashemi <
> iman.mohtashemi@gmail.com> wrote:
>
> Ok thanks.
>
> On Fri, Dec 2, 2016 at 8:19 AM Sean Owen <so...@cloudera.com> wrote:
>
> I tried, but enforcing the ordering changed a fair bit of behavior and I
> gave up. I think the way to think of it is: a RowMatrix has whatever
> ordering you made it with, so you need to give it ordered rows if you're
> going to use a method like the QR decomposition. That works. I don't think
> the QR method should ever have been on this class though, for this reason.
>
> On Fri, Dec 2, 2016 at 4:13 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Hi guys,
> Was this bug ever resolved?
> Iman
>
> On Fri, Nov 11, 2016 at 9:59 AM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Yes this would be helpful, otherwise the Q part of the decomposition is
> useless. One can use that to solve the system by transposing it and
> multiplying with b and solving for x  (Ax = b) where A = R and b = Qt*b
> since the Upper triangular matrix is correctly available (R)
>
> On Fri, Nov 11, 2016 at 3:56 AM Sean Owen <so...@cloudera.com> wrote:
>
> @Xiangrui / @Joseph, do you think it would be reasonable to have
> CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
> order to make the ultimate output of toRowMatrix less surprising when it's
> not ordered?
>
>
> On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:
>
> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
> result in a RowMatrix with rows in order of their indices, necessarily:
>
>
> // Drop its row indices.
> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>
> What you get is a matrix where the rows are arranged in whatever order
> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
> the ordering doesn't matter, but then it's maybe surprising it has a QR
> decomposition method, because clearly the result depends on the order of
> rows in the input. (CC Yuhao Yang for a comment?)
>
> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
> least something with sorted rows? that would not be hard. It also won't
> return "missing" rows (all zeroes), so it would not in any event result in
> a RowMatrix whose implicit rows and ordering represented the same matrix.
> That, at least, strikes me as something to be better documented.
>
> Maybe it would be nicer still to at least sort the rows, given the
> existence of use cases like yours. For example, at least
> CoordinateMatrix.toIndexedRowMatrix could sort? that is less surprising.
>
> In any event you should be able to make it work by manually getting the
> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
> to Vectors and making a RowMatrix from it.
>
>
>
> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Hi Sean,
> Here you go:
>
> sparsematrix.txt =
>
> row, col ,val
> 0,0,.42
> 0,1,.28
> 0,2,.89
> 1,0,.83
> 1,1,.34
> 1,2,.42
> 2,0,.23
> 3,0,.42
> 3,1,.98
> 3,2,.88
> 4,0,.23
> 4,1,.36
> 4,2,.97
>
> The vector is just the third column of the matrix which should give the
> trivial solution of [0,0,1]
>
> This translates to this which is correct
> There are zeros in the matrix (Not really sparse but just an example)
> 0.42  0.28  0.89
> 0.83  0.34  0.42
> 0.23  0.0   0.0
> 0.42  0.98  0.88
> 0.23  0.36  0.97
>
>
> Here is what I get for  the Q and R
>
> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
> 0.0                  0.7662808691141717   0.7553315911660984
> 0.0                  0.0                  0.7785210939368136
>
> When running this in matlab the numbers are the same but row 1 is the last
> row and the last row is interchanged with row 3
>
>
>
> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>
> Rather than post a large section of code, please post a small example of
> the input matrix and its decomposition, to illustrate what you're saying is
> out of order.
>
> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>
> I am getting the correct rows but they are out of order. Is this a bug or
> am
> I doing something wrong?
>
>
>
>

Re: TallSkinnyQR

Posted by Huamin Li <3e...@gmail.com>.
Hi,

There seems to be a bug in the section of code that converts the RowMatrix
format back into indexedRowMatrix format.

For RowMatrix, I think the singular values and right singular vectors (not
the left singular vectors U) that computeSVD computes are correct when
using multiple executors/machines; Only the R (not the Q) in tallSkinnyQR
is correct when using multiple executors/machines. U and Q were being
stored in RowMatrix format. There is no index information about RowMatrix,
so it does not make sense for U and Q.

Others have run into this same problem (
https://issues.apache.org/jira/browse/SPARK-8614)

I think the quick solution for this problem is copy and paste the multiply,
computeSVD, and tallSkinnyQR code from RowMatrix to IndexedRowMatrix and
make the corresponding changes although this would result in code
duplication.

I have fixed the problem by what I mentioned above. Now, multiply,
computeSVD, and tallSkinnyQR are giving the correct results for
indexedRowMatrix when using multiple executors or workers. Let me know if I
should do a pull request for this.

Best,
Huamin

On Fri, Dec 2, 2016 at 11:23 AM, Iman Mohtashemi <im...@gmail.com>
wrote:

> Ok thanks.
>
> On Fri, Dec 2, 2016 at 8:19 AM Sean Owen <so...@cloudera.com> wrote:
>
>> I tried, but enforcing the ordering changed a fair bit of behavior and I
>> gave up. I think the way to think of it is: a RowMatrix has whatever
>> ordering you made it with, so you need to give it ordered rows if you're
>> going to use a method like the QR decomposition. That works. I don't think
>> the QR method should ever have been on this class though, for this reason.
>>
>> On Fri, Dec 2, 2016 at 4:13 PM Iman Mohtashemi <im...@gmail.com>
>> wrote:
>>
>> Hi guys,
>> Was this bug ever resolved?
>> Iman
>>
>> On Fri, Nov 11, 2016 at 9:59 AM Iman Mohtashemi <
>> iman.mohtashemi@gmail.com> wrote:
>>
>> Yes this would be helpful, otherwise the Q part of the decomposition is
>> useless. One can use that to solve the system by transposing it and
>> multiplying with b and solving for x  (Ax = b) where A = R and b = Qt*b
>> since the Upper triangular matrix is correctly available (R)
>>
>> On Fri, Nov 11, 2016 at 3:56 AM Sean Owen <so...@cloudera.com> wrote:
>>
>> @Xiangrui / @Joseph, do you think it would be reasonable to have
>> CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
>> order to make the ultimate output of toRowMatrix less surprising when it's
>> not ordered?
>>
>>
>> On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:
>>
>> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
>> result in a RowMatrix with rows in order of their indices, necessarily:
>>
>>
>> // Drop its row indices.
>> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>>
>> What you get is a matrix where the rows are arranged in whatever order
>> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
>> the ordering doesn't matter, but then it's maybe surprising it has a QR
>> decomposition method, because clearly the result depends on the order of
>> rows in the input. (CC Yuhao Yang for a comment?)
>>
>> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
>> least something with sorted rows? that would not be hard. It also won't
>> return "missing" rows (all zeroes), so it would not in any event result in
>> a RowMatrix whose implicit rows and ordering represented the same matrix.
>> That, at least, strikes me as something to be better documented.
>>
>> Maybe it would be nicer still to at least sort the rows, given the
>> existence of use cases like yours. For example, at least CoordinateMatrix.toIndexedRowMatrix
>> could sort? that is less surprising.
>>
>> In any event you should be able to make it work by manually getting the
>> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
>> to Vectors and making a RowMatrix from it.
>>
>>
>>
>> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
>> wrote:
>>
>> Hi Sean,
>> Here you go:
>>
>> sparsematrix.txt =
>>
>> row, col ,val
>> 0,0,.42
>> 0,1,.28
>> 0,2,.89
>> 1,0,.83
>> 1,1,.34
>> 1,2,.42
>> 2,0,.23
>> 3,0,.42
>> 3,1,.98
>> 3,2,.88
>> 4,0,.23
>> 4,1,.36
>> 4,2,.97
>>
>> The vector is just the third column of the matrix which should give the
>> trivial solution of [0,0,1]
>>
>> This translates to this which is correct
>> There are zeros in the matrix (Not really sparse but just an example)
>> 0.42  0.28  0.89
>> 0.83  0.34  0.42
>> 0.23  0.0   0.0
>> 0.42  0.98  0.88
>> 0.23  0.36  0.97
>>
>>
>> Here is what I get for  the Q and R
>>
>> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
>> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
>> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
>> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
>> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
>> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
>> 0.0                  0.7662808691141717   0.7553315911660984
>> 0.0                  0.0                  0.7785210939368136
>>
>> When running this in matlab the numbers are the same but row 1 is the
>> last row and the last row is interchanged with row 3
>>
>>
>>
>> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>>
>> Rather than post a large section of code, please post a small example of
>> the input matrix and its decomposition, to illustrate what you're saying is
>> out of order.
>>
>> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>>
>> I am getting the correct rows but they are out of order. Is this a bug or
>> am
>> I doing something wrong?
>>
>>
>>

Re: TallSkinnyQR

Posted by Iman Mohtashemi <im...@gmail.com>.
I have a different question that might be trivial for you (although not to
me :)) Maybe you can answer this?

Here is a MapReduce Example implemented in Java.
It reads each line of text and for each word in the line of text determines
if it starts
with an upper case. If so, it creates a key value pair. But in this case
one line of text can emit multiple key/value pairs so I can't use the map
function which just returns a single Tuple2:



public class CountUppercaseMapper
    extends Mapper<LongWritable,Text,Text,IntWritable> {
  @Override
  protected void map(LongWritable lineNumber, Text line, Context context)
      throws IOException, InterruptedException {
    for (String word : line.toString().split(" ")) {
      if (Character.isUpperCase(word.charAt(0))) {
        context.write(new Text(word), new IntWritable(1));
      }
    }
  }
}

What is the equivalent spark implementation?

A more use-case specific example below with objects:

In this case, the mapper emits multiple key:value pairs that are
(String,String)

What is the equivalent spark implementation?

import java.io.IOException;

public class IsotopeClusterMapper extends Mapper<LongWritable,
Text, Text, Text> {

        @Override
        protected void map(LongWritable key, Text value, Context context)
                        throws IOException, InterruptedException {
                System.out.println("Inside Isotope Cluster Map !");
                String line = value.toString();

                // Get Isotope clusters here are write out to text
                Detector detector = new Detector();

                ArrayList<IsotopeCluster> clusters =
detector.GetClusters(line);

                for (int i = 0; i < clusters.size(); i++) {
                        String cKey =
detector.WriteClusterKey(clusters.get(i));
                        String cValue =
detector.WriteClusterValue(clusters.get(i));
                        context.write(new Text(cKey), new Text(cValue));
                }
        }
}



On Fri, Dec 2, 2016 at 8:23 AM Iman Mohtashemi <im...@gmail.com>
wrote:

> Ok thanks.
>
> On Fri, Dec 2, 2016 at 8:19 AM Sean Owen <so...@cloudera.com> wrote:
>
> I tried, but enforcing the ordering changed a fair bit of behavior and I
> gave up. I think the way to think of it is: a RowMatrix has whatever
> ordering you made it with, so you need to give it ordered rows if you're
> going to use a method like the QR decomposition. That works. I don't think
> the QR method should ever have been on this class though, for this reason.
>
> On Fri, Dec 2, 2016 at 4:13 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Hi guys,
> Was this bug ever resolved?
> Iman
>
> On Fri, Nov 11, 2016 at 9:59 AM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Yes this would be helpful, otherwise the Q part of the decomposition is
> useless. One can use that to solve the system by transposing it and
> multiplying with b and solving for x  (Ax = b) where A = R and b = Qt*b
> since the Upper triangular matrix is correctly available (R)
>
> On Fri, Nov 11, 2016 at 3:56 AM Sean Owen <so...@cloudera.com> wrote:
>
> @Xiangrui / @Joseph, do you think it would be reasonable to have
> CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
> order to make the ultimate output of toRowMatrix less surprising when it's
> not ordered?
>
>
> On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:
>
> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
> result in a RowMatrix with rows in order of their indices, necessarily:
>
>
> // Drop its row indices.
> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>
> What you get is a matrix where the rows are arranged in whatever order
> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
> the ordering doesn't matter, but then it's maybe surprising it has a QR
> decomposition method, because clearly the result depends on the order of
> rows in the input. (CC Yuhao Yang for a comment?)
>
> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
> least something with sorted rows? that would not be hard. It also won't
> return "missing" rows (all zeroes), so it would not in any event result in
> a RowMatrix whose implicit rows and ordering represented the same matrix.
> That, at least, strikes me as something to be better documented.
>
> Maybe it would be nicer still to at least sort the rows, given the
> existence of use cases like yours. For example, at least
> CoordinateMatrix.toIndexedRowMatrix could sort? that is less surprising.
>
> In any event you should be able to make it work by manually getting the
> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
> to Vectors and making a RowMatrix from it.
>
>
>
> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Hi Sean,
> Here you go:
>
> sparsematrix.txt =
>
> row, col ,val
> 0,0,.42
> 0,1,.28
> 0,2,.89
> 1,0,.83
> 1,1,.34
> 1,2,.42
> 2,0,.23
> 3,0,.42
> 3,1,.98
> 3,2,.88
> 4,0,.23
> 4,1,.36
> 4,2,.97
>
> The vector is just the third column of the matrix which should give the
> trivial solution of [0,0,1]
>
> This translates to this which is correct
> There are zeros in the matrix (Not really sparse but just an example)
> 0.42  0.28  0.89
> 0.83  0.34  0.42
> 0.23  0.0   0.0
> 0.42  0.98  0.88
> 0.23  0.36  0.97
>
>
> Here is what I get for  the Q and R
>
> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
> 0.0                  0.7662808691141717   0.7553315911660984
> 0.0                  0.0                  0.7785210939368136
>
> When running this in matlab the numbers are the same but row 1 is the last
> row and the last row is interchanged with row 3
>
>
>
> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>
> Rather than post a large section of code, please post a small example of
> the input matrix and its decomposition, to illustrate what you're saying is
> out of order.
>
> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>
> I am getting the correct rows but they are out of order. Is this a bug or
> am
> I doing something wrong?
>
>
>

Re: TallSkinnyQR

Posted by Iman Mohtashemi <im...@gmail.com>.
Ok thanks.

On Fri, Dec 2, 2016 at 8:19 AM Sean Owen <so...@cloudera.com> wrote:

> I tried, but enforcing the ordering changed a fair bit of behavior and I
> gave up. I think the way to think of it is: a RowMatrix has whatever
> ordering you made it with, so you need to give it ordered rows if you're
> going to use a method like the QR decomposition. That works. I don't think
> the QR method should ever have been on this class though, for this reason.
>
> On Fri, Dec 2, 2016 at 4:13 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Hi guys,
> Was this bug ever resolved?
> Iman
>
> On Fri, Nov 11, 2016 at 9:59 AM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Yes this would be helpful, otherwise the Q part of the decomposition is
> useless. One can use that to solve the system by transposing it and
> multiplying with b and solving for x  (Ax = b) where A = R and b = Qt*b
> since the Upper triangular matrix is correctly available (R)
>
> On Fri, Nov 11, 2016 at 3:56 AM Sean Owen <so...@cloudera.com> wrote:
>
> @Xiangrui / @Joseph, do you think it would be reasonable to have
> CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
> order to make the ultimate output of toRowMatrix less surprising when it's
> not ordered?
>
>
> On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:
>
> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
> result in a RowMatrix with rows in order of their indices, necessarily:
>
>
> // Drop its row indices.
> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>
> What you get is a matrix where the rows are arranged in whatever order
> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
> the ordering doesn't matter, but then it's maybe surprising it has a QR
> decomposition method, because clearly the result depends on the order of
> rows in the input. (CC Yuhao Yang for a comment?)
>
> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
> least something with sorted rows? that would not be hard. It also won't
> return "missing" rows (all zeroes), so it would not in any event result in
> a RowMatrix whose implicit rows and ordering represented the same matrix.
> That, at least, strikes me as something to be better documented.
>
> Maybe it would be nicer still to at least sort the rows, given the
> existence of use cases like yours. For example, at least
> CoordinateMatrix.toIndexedRowMatrix could sort? that is less surprising.
>
> In any event you should be able to make it work by manually getting the
> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
> to Vectors and making a RowMatrix from it.
>
>
>
> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Hi Sean,
> Here you go:
>
> sparsematrix.txt =
>
> row, col ,val
> 0,0,.42
> 0,1,.28
> 0,2,.89
> 1,0,.83
> 1,1,.34
> 1,2,.42
> 2,0,.23
> 3,0,.42
> 3,1,.98
> 3,2,.88
> 4,0,.23
> 4,1,.36
> 4,2,.97
>
> The vector is just the third column of the matrix which should give the
> trivial solution of [0,0,1]
>
> This translates to this which is correct
> There are zeros in the matrix (Not really sparse but just an example)
> 0.42  0.28  0.89
> 0.83  0.34  0.42
> 0.23  0.0   0.0
> 0.42  0.98  0.88
> 0.23  0.36  0.97
>
>
> Here is what I get for  the Q and R
>
> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
> 0.0                  0.7662808691141717   0.7553315911660984
> 0.0                  0.0                  0.7785210939368136
>
> When running this in matlab the numbers are the same but row 1 is the last
> row and the last row is interchanged with row 3
>
>
>
> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>
> Rather than post a large section of code, please post a small example of
> the input matrix and its decomposition, to illustrate what you're saying is
> out of order.
>
> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>
> I am getting the correct rows but they are out of order. Is this a bug or
> am
> I doing something wrong?
>
>
>

Re: TallSkinnyQR

Posted by Sean Owen <so...@cloudera.com>.
I tried, but enforcing the ordering changed a fair bit of behavior and I
gave up. I think the way to think of it is: a RowMatrix has whatever
ordering you made it with, so you need to give it ordered rows if you're
going to use a method like the QR decomposition. That works. I don't think
the QR method should ever have been on this class though, for this reason.

On Fri, Dec 2, 2016 at 4:13 PM Iman Mohtashemi <im...@gmail.com>
wrote:

> Hi guys,
> Was this bug ever resolved?
> Iman
>
> On Fri, Nov 11, 2016 at 9:59 AM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Yes this would be helpful, otherwise the Q part of the decomposition is
> useless. One can use that to solve the system by transposing it and
> multiplying with b and solving for x  (Ax = b) where A = R and b = Qt*b
> since the Upper triangular matrix is correctly available (R)
>
> On Fri, Nov 11, 2016 at 3:56 AM Sean Owen <so...@cloudera.com> wrote:
>
> @Xiangrui / @Joseph, do you think it would be reasonable to have
> CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
> order to make the ultimate output of toRowMatrix less surprising when it's
> not ordered?
>
>
> On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:
>
> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
> result in a RowMatrix with rows in order of their indices, necessarily:
>
>
> // Drop its row indices.
> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>
> What you get is a matrix where the rows are arranged in whatever order
> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
> the ordering doesn't matter, but then it's maybe surprising it has a QR
> decomposition method, because clearly the result depends on the order of
> rows in the input. (CC Yuhao Yang for a comment?)
>
> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
> least something with sorted rows? that would not be hard. It also won't
> return "missing" rows (all zeroes), so it would not in any event result in
> a RowMatrix whose implicit rows and ordering represented the same matrix.
> That, at least, strikes me as something to be better documented.
>
> Maybe it would be nicer still to at least sort the rows, given the
> existence of use cases like yours. For example, at least
> CoordinateMatrix.toIndexedRowMatrix could sort? that is less surprising.
>
> In any event you should be able to make it work by manually getting the
> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
> to Vectors and making a RowMatrix from it.
>
>
>
> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Hi Sean,
> Here you go:
>
> sparsematrix.txt =
>
> row, col ,val
> 0,0,.42
> 0,1,.28
> 0,2,.89
> 1,0,.83
> 1,1,.34
> 1,2,.42
> 2,0,.23
> 3,0,.42
> 3,1,.98
> 3,2,.88
> 4,0,.23
> 4,1,.36
> 4,2,.97
>
> The vector is just the third column of the matrix which should give the
> trivial solution of [0,0,1]
>
> This translates to this which is correct
> There are zeros in the matrix (Not really sparse but just an example)
> 0.42  0.28  0.89
> 0.83  0.34  0.42
> 0.23  0.0   0.0
> 0.42  0.98  0.88
> 0.23  0.36  0.97
>
>
> Here is what I get for  the Q and R
>
> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
> 0.0                  0.7662808691141717   0.7553315911660984
> 0.0                  0.0                  0.7785210939368136
>
> When running this in matlab the numbers are the same but row 1 is the last
> row and the last row is interchanged with row 3
>
>
>
> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>
> Rather than post a large section of code, please post a small example of
> the input matrix and its decomposition, to illustrate what you're saying is
> out of order.
>
> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>
> I am getting the correct rows but they are out of order. Is this a bug or
> am
> I doing something wrong?
>
>
>

Re: TallSkinnyQR

Posted by Iman Mohtashemi <im...@gmail.com>.
Hi guys,
Was this bug ever resolved?
Iman

On Fri, Nov 11, 2016 at 9:59 AM Iman Mohtashemi <im...@gmail.com>
wrote:

> Yes this would be helpful, otherwise the Q part of the decomposition is
> useless. One can use that to solve the system by transposing it and
> multiplying with b and solving for x  (Ax = b) where A = R and b = Qt*b
> since the Upper triangular matrix is correctly available (R)
>
> On Fri, Nov 11, 2016 at 3:56 AM Sean Owen <so...@cloudera.com> wrote:
>
> @Xiangrui / @Joseph, do you think it would be reasonable to have
> CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
> order to make the ultimate output of toRowMatrix less surprising when it's
> not ordered?
>
>
> On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:
>
> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
> result in a RowMatrix with rows in order of their indices, necessarily:
>
>
> // Drop its row indices.
> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>
> What you get is a matrix where the rows are arranged in whatever order
> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
> the ordering doesn't matter, but then it's maybe surprising it has a QR
> decomposition method, because clearly the result depends on the order of
> rows in the input. (CC Yuhao Yang for a comment?)
>
> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
> least something with sorted rows? that would not be hard. It also won't
> return "missing" rows (all zeroes), so it would not in any event result in
> a RowMatrix whose implicit rows and ordering represented the same matrix.
> That, at least, strikes me as something to be better documented.
>
> Maybe it would be nicer still to at least sort the rows, given the
> existence of use cases like yours. For example, at least
> CoordinateMatrix.toIndexedRowMatrix could sort? that is less surprising.
>
> In any event you should be able to make it work by manually getting the
> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
> to Vectors and making a RowMatrix from it.
>
>
>
> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Hi Sean,
> Here you go:
>
> sparsematrix.txt =
>
> row, col ,val
> 0,0,.42
> 0,1,.28
> 0,2,.89
> 1,0,.83
> 1,1,.34
> 1,2,.42
> 2,0,.23
> 3,0,.42
> 3,1,.98
> 3,2,.88
> 4,0,.23
> 4,1,.36
> 4,2,.97
>
> The vector is just the third column of the matrix which should give the
> trivial solution of [0,0,1]
>
> This translates to this which is correct
> There are zeros in the matrix (Not really sparse but just an example)
> 0.42  0.28  0.89
> 0.83  0.34  0.42
> 0.23  0.0   0.0
> 0.42  0.98  0.88
> 0.23  0.36  0.97
>
>
> Here is what I get for  the Q and R
>
> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
> 0.0                  0.7662808691141717   0.7553315911660984
> 0.0                  0.0                  0.7785210939368136
>
> When running this in matlab the numbers are the same but row 1 is the last
> row and the last row is interchanged with row 3
>
>
>
> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>
> Rather than post a large section of code, please post a small example of
> the input matrix and its decomposition, to illustrate what you're saying is
> out of order.
>
> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>
> I am getting the correct rows but they are out of order. Is this a bug or
> am
> I doing something wrong?
>
>
>

Re: TallSkinnyQR

Posted by Iman Mohtashemi <im...@gmail.com>.
Yes this would be helpful, otherwise the Q part of the decomposition is
useless. One can use that to solve the system by transposing it and
multiplying with b and solving for x  (Ax = b) where A = R and b = Qt*b
since the Upper triangular matrix is correctly available (R)

On Fri, Nov 11, 2016 at 3:56 AM Sean Owen <so...@cloudera.com> wrote:

> @Xiangrui / @Joseph, do you think it would be reasonable to have
> CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
> order to make the ultimate output of toRowMatrix less surprising when it's
> not ordered?
>
>
> On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:
>
> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
> result in a RowMatrix with rows in order of their indices, necessarily:
>
>
> // Drop its row indices.
> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>
> What you get is a matrix where the rows are arranged in whatever order
> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
> the ordering doesn't matter, but then it's maybe surprising it has a QR
> decomposition method, because clearly the result depends on the order of
> rows in the input. (CC Yuhao Yang for a comment?)
>
> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
> least something with sorted rows? that would not be hard. It also won't
> return "missing" rows (all zeroes), so it would not in any event result in
> a RowMatrix whose implicit rows and ordering represented the same matrix.
> That, at least, strikes me as something to be better documented.
>
> Maybe it would be nicer still to at least sort the rows, given the
> existence of use cases like yours. For example, at least
> CoordinateMatrix.toIndexedRowMatrix could sort? that is less surprising.
>
> In any event you should be able to make it work by manually getting the
> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
> to Vectors and making a RowMatrix from it.
>
>
>
> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Hi Sean,
> Here you go:
>
> sparsematrix.txt =
>
> row, col ,val
> 0,0,.42
> 0,1,.28
> 0,2,.89
> 1,0,.83
> 1,1,.34
> 1,2,.42
> 2,0,.23
> 3,0,.42
> 3,1,.98
> 3,2,.88
> 4,0,.23
> 4,1,.36
> 4,2,.97
>
> The vector is just the third column of the matrix which should give the
> trivial solution of [0,0,1]
>
> This translates to this which is correct
> There are zeros in the matrix (Not really sparse but just an example)
> 0.42  0.28  0.89
> 0.83  0.34  0.42
> 0.23  0.0   0.0
> 0.42  0.98  0.88
> 0.23  0.36  0.97
>
>
> Here is what I get for  the Q and R
>
> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
> 0.0                  0.7662808691141717   0.7553315911660984
> 0.0                  0.0                  0.7785210939368136
>
> When running this in matlab the numbers are the same but row 1 is the last
> row and the last row is interchanged with row 3
>
>
>
> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>
> Rather than post a large section of code, please post a small example of
> the input matrix and its decomposition, to illustrate what you're saying is
> out of order.
>
> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>
> I am getting the correct rows but they are out of order. Is this a bug or
> am
> I doing something wrong?
>
>
>

Re: TallSkinnyQR

Posted by Sean Owen <so...@cloudera.com>.
@Xiangrui / @Joseph, do you think it would be reasonable to have
CoordinateMatrix sort the rows it creates to make an IndexedRowMatrix? in
order to make the ultimate output of toRowMatrix less surprising when it's
not ordered?

On Tue, Nov 8, 2016 at 3:29 PM Sean Owen <so...@cloudera.com> wrote:

> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
> result in a RowMatrix with rows in order of their indices, necessarily:
>
>
> // Drop its row indices.
> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>
> What you get is a matrix where the rows are arranged in whatever order
> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
> the ordering doesn't matter, but then it's maybe surprising it has a QR
> decomposition method, because clearly the result depends on the order of
> rows in the input. (CC Yuhao Yang for a comment?)
>
> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
> least something with sorted rows? that would not be hard. It also won't
> return "missing" rows (all zeroes), so it would not in any event result in
> a RowMatrix whose implicit rows and ordering represented the same matrix.
> That, at least, strikes me as something to be better documented.
>
> Maybe it would be nicer still to at least sort the rows, given the
> existence of use cases like yours. For example, at least
> CoordinateMatrix.toIndexedRowMatrix could sort? that is less surprising.
>
> In any event you should be able to make it work by manually getting the
> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
> to Vectors and making a RowMatrix from it.
>
>
>
> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
> Hi Sean,
> Here you go:
>
> sparsematrix.txt =
>
> row, col ,val
> 0,0,.42
> 0,1,.28
> 0,2,.89
> 1,0,.83
> 1,1,.34
> 1,2,.42
> 2,0,.23
> 3,0,.42
> 3,1,.98
> 3,2,.88
> 4,0,.23
> 4,1,.36
> 4,2,.97
>
> The vector is just the third column of the matrix which should give the
> trivial solution of [0,0,1]
>
> This translates to this which is correct
> There are zeros in the matrix (Not really sparse but just an example)
> 0.42  0.28  0.89
> 0.83  0.34  0.42
> 0.23  0.0   0.0
> 0.42  0.98  0.88
> 0.23  0.36  0.97
>
>
> Here is what I get for  the Q and R
>
> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
> 0.0                  0.7662808691141717   0.7553315911660984
> 0.0                  0.0                  0.7785210939368136
>
> When running this in matlab the numbers are the same but row 1 is the last
> row and the last row is interchanged with row 3
>
>
>
> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>
> Rather than post a large section of code, please post a small example of
> the input matrix and its decomposition, to illustrate what you're saying is
> out of order.
>
> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>
> I am getting the correct rows but they are out of order. Is this a bug or
> am
> I doing something wrong?
>
>
>

Re: TallSkinnyQR

Posted by Iman Mohtashemi <im...@gmail.com>.
Thanks Sean! Let me take a look!
Iman

On Nov 8, 2016 7:29 AM, "Sean Owen" <so...@cloudera.com> wrote:

> I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
> result in a RowMatrix with rows in order of their indices, necessarily:
>
> // Drop its row indices.
> RowMatrix rowMat = indexedRowMatrix.toRowMatrix();
>
> What you get is a matrix where the rows are arranged in whatever order
> they were passed to IndexedRowMatrix. RowMatrix says it's for rows where
> the ordering doesn't matter, but then it's maybe surprising it has a QR
> decomposition method, because clearly the result depends on the order of
> rows in the input. (CC Yuhao Yang for a comment?)
>
> You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
> least something with sorted rows? that would not be hard. It also won't
> return "missing" rows (all zeroes), so it would not in any event result in
> a RowMatrix whose implicit rows and ordering represented the same matrix.
> That, at least, strikes me as something to be better documented.
>
> Maybe it would be nicer still to at least sort the rows, given the
> existence of use cases like yours. For example, at least CoordinateMatrix.toIndexedRowMatrix
> could sort? that is less surprising.
>
> In any event you should be able to make it work by manually getting the
> RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
> to Vectors and making a RowMatrix from it.
>
>
>
> On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
> wrote:
>
>> Hi Sean,
>> Here you go:
>>
>> sparsematrix.txt =
>>
>> row, col ,val
>> 0,0,.42
>> 0,1,.28
>> 0,2,.89
>> 1,0,.83
>> 1,1,.34
>> 1,2,.42
>> 2,0,.23
>> 3,0,.42
>> 3,1,.98
>> 3,2,.88
>> 4,0,.23
>> 4,1,.36
>> 4,2,.97
>>
>> The vector is just the third column of the matrix which should give the
>> trivial solution of [0,0,1]
>>
>> This translates to this which is correct
>> There are zeros in the matrix (Not really sparse but just an example)
>> 0.42  0.28  0.89
>> 0.83  0.34  0.42
>> 0.23  0.0   0.0
>> 0.42  0.98  0.88
>> 0.23  0.36  0.97
>>
>>
>> Here is what I get for  the Q and R
>>
>> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
>> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
>> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
>> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
>> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
>> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
>> 0.0                  0.7662808691141717   0.7553315911660984
>> 0.0                  0.0                  0.7785210939368136
>>
>> When running this in matlab the numbers are the same but row 1 is the
>> last row and the last row is interchanged with row 3
>>
>>
>>
>> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>>
>> Rather than post a large section of code, please post a small example of
>> the input matrix and its decomposition, to illustrate what you're saying is
>> out of order.
>>
>> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>>
>> I am getting the correct rows but they are out of order. Is this a bug or
>> am
>> I doing something wrong?
>>
>>
>>

Re: TallSkinnyQR

Posted by Sean Owen <so...@cloudera.com>.
I think the problem here is that IndexedRowMatrix.toRowMatrix does *not*
result in a RowMatrix with rows in order of their indices, necessarily:

// Drop its row indices.
RowMatrix rowMat = indexedRowMatrix.toRowMatrix();

What you get is a matrix where the rows are arranged in whatever order they
were passed to IndexedRowMatrix. RowMatrix says it's for rows where the
ordering doesn't matter, but then it's maybe surprising it has a QR
decomposition method, because clearly the result depends on the order of
rows in the input. (CC Yuhao Yang for a comment?)

You could say, well, why doesn't IndexedRowMatrix.toRowMatrix return at
least something with sorted rows? that would not be hard. It also won't
return "missing" rows (all zeroes), so it would not in any event result in
a RowMatrix whose implicit rows and ordering represented the same matrix.
That, at least, strikes me as something to be better documented.

Maybe it would be nicer still to at least sort the rows, given the
existence of use cases like yours. For example, at least
CoordinateMatrix.toIndexedRowMatrix could sort? that is less surprising.

In any event you should be able to make it work by manually getting the
RDD[IndexedRow] out of IndexedRowMatrix, sorting by index, then mapping it
to Vectors and making a RowMatrix from it.



On Tue, Nov 8, 2016 at 2:41 PM Iman Mohtashemi <im...@gmail.com>
wrote:

> Hi Sean,
> Here you go:
>
> sparsematrix.txt =
>
> row, col ,val
> 0,0,.42
> 0,1,.28
> 0,2,.89
> 1,0,.83
> 1,1,.34
> 1,2,.42
> 2,0,.23
> 3,0,.42
> 3,1,.98
> 3,2,.88
> 4,0,.23
> 4,1,.36
> 4,2,.97
>
> The vector is just the third column of the matrix which should give the
> trivial solution of [0,0,1]
>
> This translates to this which is correct
> There are zeros in the matrix (Not really sparse but just an example)
> 0.42  0.28  0.89
> 0.83  0.34  0.42
> 0.23  0.0   0.0
> 0.42  0.98  0.88
> 0.23  0.36  0.97
>
>
> Here is what I get for  the Q and R
>
> Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
> -0.3920784235278427   -0.06171221388256143  0.5847874866876442
> -0.7748216464954987   -0.4003560542230838   -0.29392323671555354
> -0.3920784235278427   0.8517909521421976    -0.31435038559403217
> -0.21470961288429483  -0.23389547730301666  -0.11165321782745863
> R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
> 0.0                  0.7662808691141717   0.7553315911660984
> 0.0                  0.0                  0.7785210939368136
>
> When running this in matlab the numbers are the same but row 1 is the last
> row and the last row is interchanged with row 3
>
>
>
> On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:
>
> Rather than post a large section of code, please post a small example of
> the input matrix and its decomposition, to illustrate what you're saying is
> out of order.
>
> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>
> I am getting the correct rows but they are out of order. Is this a bug or
> am
> I doing something wrong?
>
>
>

Re: TallSkinnyQR

Posted by Iman Mohtashemi <im...@gmail.com>.
Hi Sean,
Here you go:

sparsematrix.txt =

row, col ,val
0,0,.42
0,1,.28
0,2,.89
1,0,.83
1,1,.34
1,2,.42
2,0,.23
3,0,.42
3,1,.98
3,2,.88
4,0,.23
4,1,.36
4,2,.97

The vector is just the third column of the matrix which should give the
trivial solution of [0,0,1]

This translates to this which is correct
There are zeros in the matrix (Not really sparse but just an example)
0.42  0.28  0.89
0.83  0.34  0.42
0.23  0.0   0.0
0.42  0.98  0.88
0.23  0.36  0.97


Here is what I get for  the Q and R

Q: -0.21470961288429483  0.23590615093828807   0.6784910613691661
-0.3920784235278427   -0.06171221388256143  0.5847874866876442
-0.7748216464954987   -0.4003560542230838   -0.29392323671555354
-0.3920784235278427   0.8517909521421976    -0.31435038559403217
-0.21470961288429483  -0.23389547730301666  -0.11165321782745863
R: -1.0712142642814275  -0.8347536340918976  -1.227672225670157
0.0                  0.7662808691141717   0.7553315911660984
0.0                  0.0                  0.7785210939368136

When running this in matlab the numbers are the same but row 1 is the last
row and the last row is interchanged with row 3



On Mon, Nov 7, 2016 at 11:35 PM Sean Owen <so...@cloudera.com> wrote:

> Rather than post a large section of code, please post a small example of
> the input matrix and its decomposition, to illustrate what you're saying is
> out of order.
>
> On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:
>
> I am getting the correct rows but they are out of order. Is this a bug or
> am
> I doing something wrong?
>
>
>

Re: TallSkinnyQR

Posted by Sean Owen <so...@cloudera.com>.
Rather than post a large section of code, please post a small example of
the input matrix and its decomposition, to illustrate what you're saying is
out of order.

On Tue, Nov 8, 2016 at 3:50 AM im281 <im...@gmail.com> wrote:

> I am getting the correct rows but they are out of order. Is this a bug or
> am
> I doing something wrong?
>
>
>