You are viewing a plain text version of this content. The canonical link for it is here.

Posted to common-user@hadoop.apache.org by ykj <yk...@163.com> on 2009/05/20 09:08:49 UTC

Hama Problem

Hello,everyone 

       I am new to hama. in our project ,my team leader let me upload  old
code, run it on hadoop with parallel matrix computation.this is old code:

public class EigenFaceGenerator {
	
	Matrix 	averageFace;		//stores the average face useful when probing the
database
    Matrix 	eigVectors;			//stores all the sorted eigen vectors from the
training set
	Matrix 	eigValues;			//Stores all the sorted eigen Values from the training
set
	boolean trained = false;	//has a training set been provided yet?
	int 	numEigenVecs = 0;	//number of eigen vectors availiable
	
	/**
	 * 
	 * @param faces array of pictures to be used for the training
	 * @param progress 
	 */
	public void processTrainingSet(Face[] faces)
	{
		//TODO : there are errors that can be thrown when no fiels are parsed into
here
		//TODO : Check that all the images are the same size

		/**
		 * STEP 1
		 * 		Read in the images, flatten them out into one row of values, and
stack in a big matrix
		 */
		double[][] dpix = new
double[faces.length][faces[0].picture.getImagePixels().length];
		
		for(int i=0; i<faces.length; i++)
		{		//for each picture in the set
			double[] pixels = faces[i].picture.getImagePixels();
			for (int j=0; j<pixels.length; j++)
			{
				dpix[i][j] = pixels[j];
			}
		}
		//make matrix of stacked flattened images
		Matrix matrix = new Matrix(dpix);
		
		
		/**
		 * STEP 2
		 * 		Calculate the average face, and then take this away from each of the
image
		 * 		effectivly calculating the difference form the average.
		 */
		//compute the average image
		averageFace = new Matrix(1,matrix.getColumnDimension());
		for(int i=0; i<matrix.getRowDimension(); i++)
		{
			averageFace.plusEquals(matrix.getMatrix(i,i,0,matrix.getColumnDimension()
- 1));
		}
		averageFace.timesEquals(1.0/(double)matrix.getRowDimension());	//divide by
the number of pixels to get the average
		Matrix bigAvg = new Matrix(matrix.getRowDimension(),
matrix.getColumnDimension());
		for (int i=0; i<bigAvg.getRowDimension(); i++)
		{
			bigAvg.setMatrix(i,i,0,bigAvg.getColumnDimension()-1,averageFace);
		}
		// Compute the diference form the average face for each image
		Matrix A = matrix.minus(bigAvg).transpose();
		
		
		/**
		 * STEP 3
		 * 		Now compute the the patternwise (nexamp x nexamp) covariance matrix
		 */		
		// TODO : for the presentation work out why this is done, and what it's
telling us
		Matrix At = A.transpose();
	    Matrix L = At.times(A);

		
		/**
		 * STEP 4
		 * 		Calculate the eigen values and vectors of this covariance matrix
		 * 
		 * 		% Get the eigenvectors (columns of Vectors) and eigenvalues (diag of
Values)
		 */
		EigenvalueDecomposition eigen = L.eig();
		eigValues 	= eigen.getD();
		eigVectors 	= eigen.getV();
		
		
		/**
		 * STEP 5
		 * 		% Sort the vectors/values according to size of eigenvalue
		 */
		Matrix[] eigDVSorted = sortem(eigValues, eigVectors);
		eigValues = eigDVSorted[0];
		eigVectors = eigDVSorted[1];
			
		
		/**
		 * STEP 6
		 * 		% Convert the eigenvectors of A'*A into eigenvectors of A*A'
		 */

		eigVectors = A.times(eigVectors);
		
		
		/**
		 * STEP 7
		 * 		% Get the eigenvalues out of the diagonal matrix and
		 *		% normalize them so the evalues are specifically for cov(A'), not
A*A'.
		 */
		double[] values = diag(eigValues);
		for(int i = 0; i < values.length; i++)
			values[i] /= A.getColumnDimension() - 1;
		
		
		/**
		 * STEP 8
		 * 		% Normalize Vectors to unit length, kill vectors corr. to tiny
evalues
		 */
		numEigenVecs = 0;
		for(int i = 0; i < eigVectors.getColumnDimension(); i++) {
			Matrix tmp;
			if (values[i] < 0.0001)
			{
				tmp = new Matrix(eigVectors.getRowDimension(),1);
			}
			else
			{
				tmp = eigVectors.getMatrix(0,eigVectors.getRowDimension()-1,i,i).times(
						1 / eigVectors.getMatrix(0, eigVectors.getRowDimension() - 1, i,
i).normF());
				numEigenVecs++;
			}
			eigVectors.setMatrix(0,eigVectors.getRowDimension()-1,i,i,tmp);
			//eigVectors.timesEquals(1 / eigVectors.getMatrix(0,
eigVectors.getRowDimension() - 1, i, i).normInf());
		}
		eigVectors = eigVectors.getMatrix(0,eigVectors.getRowDimension() - 1, 0,
numEigenVecs - 1);
		
		trained = true;
        
		
		/*System.out.println("There are " + numGood + "
eigenVectors\n\nEigenVectorSize");
		System.out.println(eigVectors.getRowDimension());
		System.out.println(eigVectors.getColumnDimension());
		try {
            PrintWriter pw = new PrintWriter("c:\\tmp\\test.txt");
            eigVectors.print(pw, 8, 4);
            pw.flush();
            pw.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        
		int width = pics[0].img.getWidth(null);
		BufferedImage biAvg = imageFromMatrix(bigAvg.getArrayCopy()[0], width);
		
		try {
            saveImage(new File("c:\\tmp\\test.jpg"), biAvg);
        } catch (IOException e1) {
            e1.printStackTrace();
        }*/
	}
	
	/**
	 * Returns a number of eigenFace values to be used in a feature space
	 * @param pic
	 * @param number number of eigen feature values.  
	 * @return will be of length number or this.getNumEigenVecs whichever is
the smaller
	 */
	public double[] getEigenFaces(Picture pic, int number)
	{
		if (number > numEigenVecs)		//adjust the number to the maxium number of
eigen vectors availiable
			number = numEigenVecs;
		
		double[] ret = new double[number];
		
		double[] pixels = pic.getImagePixels();
		Matrix face = new Matrix(pixels, pixels.length);
		Matrix Vecs = eigVectors.getMatrix(0,eigVectors.getRowDimension()-1, 0,
number-1).transpose();
		
		Matrix rslt = Vecs.times(face);
		
		for (int i=0; i<number; i++)
		{
			ret[i] = rslt.get(i,0);
		}
		
		return ret;
	}
	
	

	/**
	 * Gets the diagonal of a matrix
	 * @param M matrix
	 * @return
	 */
	private double[] diag(Matrix M) {
	    double[] dvec = new double[M.getColumnDimension()];
	    for(int i = 0; i < M.getColumnDimension(); i++)
	        dvec[i] = M.get(i, i);
	    return dvec;
	    
	}
	/**
	 * Sorts the Eigenvalues and vectors in decending order
	 * 
	 * @param D = eigen Values
	 * @param V = eigen Vectors
	 * @return
	 */
	private Matrix[] sortem(Matrix D, Matrix V) {
	    //dvec = diag(D); // get diagonal components
	    double[] dvec = diag(D);
	    		    
	    //NV = zeros(size(V));
	    
	    
	    //[dvec,index_dv] = sort(dvec); // sort dvec, maintain index in
index_dv
	    
	    class di_pair{ double value; int index; };
	    di_pair[] dvec_indexed = new di_pair[dvec.length];
	    for(int i = 0; i < dvec_indexed.length; i++) {
	        dvec_indexed[i] = new di_pair();
	        dvec_indexed[i].index = i;
	        dvec_indexed[i].value = dvec[i];
	    }
	        
	    Comparator di_pair_sort = new Comparator() {
            public int compare(Object arg0, Object arg1) {
                di_pair lt = (di_pair)arg0;
                di_pair rt = (di_pair)arg1;
                double dif = (lt.value - rt.value);
                if(dif > 0) return -1;
                if(dif < 0) return 1;
                else return 0;
            }
	    };
	    Arrays.sort(dvec_indexed, di_pair_sort);
	    
	    //index_dv = flipud(index_dv);
	    //for i = 1:size(D,1)
	    //  ND(i,i) = D(index_dv(i),index_dv(i));
	    //  NV(:,i) = V(:,index_dv(i));
	    //end;

	    Matrix D2 = new Matrix(D.getRowDimension(), D.getColumnDimension());
	    Matrix V2 = new Matrix(V.getRowDimension(), V.getColumnDimension());
	    
	    for(int i = 0; i < dvec_indexed.length; i++) {
	        D2.set(i, i, D.get(dvec_indexed[i].index, dvec_indexed[i].index));
	        int height = V.getRowDimension() - 1;
	        Matrix tmp =
V.getMatrix(dvec_indexed[i].index,dvec_indexed[i].index,0,height);
	        V2.setMatrix(i, i,0,height, tmp);
	    }
	    //TODO : Not sure why, but this has to be flipped - check this out
maybe?
	    Matrix V3 = new Matrix(V.getRowDimension(), V.getColumnDimension());
	    for (int i=0; i<V3.getRowDimension(); i++)
	    {
	    	for (int j=0; j< V3.getColumnDimension(); j++)
	    	{
	    		V3.set(i,j,V2.get(V3.getRowDimension() - i - 1,
V3.getColumnDimension() - j - 1));
	    	}
	    }
	    
	    return new Matrix[] { D2, V3 };
	}
	
	
	public boolean isTrained() {
		return trained;
	}
	public int getNumEigenVecs() {
		return numEigenVecs;
	}
}


I am not sure this code easily can be parallel computed,or how to change
this code to add the parallel  compuation.any advice will  be
appreciated.thanks in advance.

-- 
View this message in context: http://www.nabble.com/Hama--Problem-tp23630187p23630187.html
Sent from the Hadoop core-user mailing list archive at Nabble.com.

Re: Hama Problem

Posted by "Edward J. Yoon" <ed...@apache.org>.

I think... If we construct the small matrix, we maybe don't need to
use the hbase before the store it.

On Wed, May 20, 2009 at 11:16 PM, Edward J. Yoon <ed...@apache.org> wrote:
> //make matrix of stacked flattened images
> Matrix matrix = new Matrix(dpix);
>
> Hmm, Should we add this constructor for Image? IMO, it can be added to
> DenseMatrix, but it could be really small.
>
> AFAIK, In commons.math2, linear algebra will be added. It would be
> nice if we use it as a local version.
>
> Any comments are welcome.
>
> On Wed, May 20, 2009 at 10:53 PM, Edward J. Yoon <ed...@apache.org> wrote:
>> Hi,
>>
>>> I am not sure this code easily can be parallel computed,or how to change
>>> this code to add the parallel  compuation.any advice will  be
>>> appreciated.thanks in advance.
>>
>> OK, I'm sure it could be run on Hama/Hadoop.
>>
>> According to my understanding of your code, It's a PCA. If you have an
>> M images as represented N^2 * 1 vectors, vector will be nomalized by
>> step-2. In step-3, the covariance matrix will be constructed using
>> matrix A by deviation of images and A^T. Finally you'll get the
>> eigenfaces using eigen values/vectors.
>>
>> There are two large computing component in the step-3 and step-4, such
>> as N^2 by N^2 matrix transpose, multiplication and eigen
>> values/vectors of M by M matrix.
>>
>> Currently in Hama, multiplication and transpose were implemented. If
>> eigenvalue decomposition is implement, you can easily migrate code.
>>
>>> hama has it's own mailing list and this question is probably better
>>> asked there. see http://incubator.apache.org/hama/mailing_lists.html
>>
>> Yes, we have own mailing list. Please contact us.
>>
>> Thanks,
>> Edward
>>
>> On Wed, May 20, 2009 at 4:51 PM, Robert Burrell Donkin
>> <ro...@gmail.com> wrote:
>>> On Wed, May 20, 2009 at 8:08 AM, ykj <yk...@163.com> wrote:
>>>>
>>>> Hello,everyone
>>>
>>> hi
>>>
>>>>       I am new to hama. in our project ,my team leader let me upload  old
>>>> code, run it on hadoop with parallel matrix computation.
>>>
>>> hama has it's own mailing list and this question is probably better
>>> asked there. see http://incubator.apache.org/hama/mailing_lists.html
>>>
>>> thanks
>>>
>>> - robert
>>>
>>
>>
>>
>> --
>> Best Regards, Edward J. Yoon @ NHN, corp.
>> edwardyoon@apache.org
>> http://blog.udanax.org
>>
>
>
>
> --
> Best Regards, Edward J. Yoon @ NHN, corp.
> edwardyoon@apache.org
> http://blog.udanax.org
>



-- 
Best Regards, Edward J. Yoon @ NHN, corp.
edwardyoon@apache.org
http://blog.udanax.org

MO,

Re: Hama Problem

Posted by "Edward J. Yoon" <ed...@apache.org>.

//make matrix of stacked flattened images
Matrix matrix = new Matrix(dpix);

Hmm, Should we add this constructor for Image? IMO, it can be added to
DenseMatrix, but it could be really small.

AFAIK, In commons.math2, linear algebra will be added. It would be
nice if we use it as a local version.

Any comments are welcome.

On Wed, May 20, 2009 at 10:53 PM, Edward J. Yoon <ed...@apache.org> wrote:
> Hi,
>
>> I am not sure this code easily can be parallel computed,or how to change
>> this code to add the parallel  compuation.any advice will  be
>> appreciated.thanks in advance.
>
> OK, I'm sure it could be run on Hama/Hadoop.
>
> According to my understanding of your code, It's a PCA. If you have an
> M images as represented N^2 * 1 vectors, vector will be nomalized by
> step-2. In step-3, the covariance matrix will be constructed using
> matrix A by deviation of images and A^T. Finally you'll get the
> eigenfaces using eigen values/vectors.
>
> There are two large computing component in the step-3 and step-4, such
> as N^2 by N^2 matrix transpose, multiplication and eigen
> values/vectors of M by M matrix.
>
> Currently in Hama, multiplication and transpose were implemented. If
> eigenvalue decomposition is implement, you can easily migrate code.
>
>> hama has it's own mailing list and this question is probably better
>> asked there. see http://incubator.apache.org/hama/mailing_lists.html
>
> Yes, we have own mailing list. Please contact us.
>
> Thanks,
> Edward
>
> On Wed, May 20, 2009 at 4:51 PM, Robert Burrell Donkin
> <ro...@gmail.com> wrote:
>> On Wed, May 20, 2009 at 8:08 AM, ykj <yk...@163.com> wrote:
>>>
>>> Hello,everyone
>>
>> hi
>>
>>>       I am new to hama. in our project ,my team leader let me upload  old
>>> code, run it on hadoop with parallel matrix computation.
>>
>> hama has it's own mailing list and this question is probably better
>> asked there. see http://incubator.apache.org/hama/mailing_lists.html
>>
>> thanks
>>
>> - robert
>>
>
>
>
> --
> Best Regards, Edward J. Yoon @ NHN, corp.
> edwardyoon@apache.org
> http://blog.udanax.org
>



-- 
Best Regards, Edward J. Yoon @ NHN, corp.
edwardyoon@apache.org
http://blog.udanax.org

Re: Hama Problem

Posted by "Edward J. Yoon" <ed...@apache.org>.

Hi,

> I am not sure this code easily can be parallel computed,or how to change
> this code to add the parallel  compuation.any advice will  be
> appreciated.thanks in advance.

OK, I'm sure it could be run on Hama/Hadoop.

According to my understanding of your code, It's a PCA. If you have an
M images as represented N^2 * 1 vectors, vector will be nomalized by
step-2. In step-3, the covariance matrix will be constructed using
matrix A by deviation of images and A^T. Finally you'll get the
eigenfaces using eigen values/vectors.

There are two large computing component in the step-3 and step-4, such
as N^2 by N^2 matrix transpose, multiplication and eigen
values/vectors of M by M matrix.

Currently in Hama, multiplication and transpose were implemented. If
eigenvalue decomposition is implement, you can easily migrate code.

> hama has it's own mailing list and this question is probably better
> asked there. see http://incubator.apache.org/hama/mailing_lists.html

Yes, we have own mailing list. Please contact us.

Thanks,
Edward

On Wed, May 20, 2009 at 4:51 PM, Robert Burrell Donkin
<ro...@gmail.com> wrote:
> On Wed, May 20, 2009 at 8:08 AM, ykj <yk...@163.com> wrote:
>>
>> Hello,everyone
>
> hi
>
>>       I am new to hama. in our project ,my team leader let me upload  old
>> code, run it on hadoop with parallel matrix computation.
>
> hama has it's own mailing list and this question is probably better
> asked there. see http://incubator.apache.org/hama/mailing_lists.html
>
> thanks
>
> - robert
>

-- 
Best Regards, Edward J. Yoon @ NHN, corp.
edwardyoon@apache.org
http://blog.udanax.org

Re: Hama Problem

Posted by "Edward J. Yoon" <ed...@apache.org>.

Hi,

> I am not sure this code easily can be parallel computed,or how to change
> this code to add the parallel  compuation.any advice will  be
> appreciated.thanks in advance.

OK, I'm sure it could be run on Hama/Hadoop.

According to my understanding of your code, It's a PCA. If you have an
M images as represented N^2 * 1 vectors, vector will be nomalized by
step-2. In step-3, the covariance matrix will be constructed using
matrix A by deviation of images and A^T. Finally you'll get the
eigenfaces using eigen values/vectors.

There are two large computing component in the step-3 and step-4, such
as N^2 by N^2 matrix transpose, multiplication and eigen
values/vectors of M by M matrix.

Currently in Hama, multiplication and transpose were implemented. If
eigenvalue decomposition is implement, you can easily migrate code.

> hama has it's own mailing list and this question is probably better
> asked there. see http://incubator.apache.org/hama/mailing_lists.html

Yes, we have own mailing list. Please contact us.

Thanks,
Edward

On Wed, May 20, 2009 at 4:51 PM, Robert Burrell Donkin
<ro...@gmail.com> wrote:
> On Wed, May 20, 2009 at 8:08 AM, ykj <yk...@163.com> wrote:
>>
>> Hello,everyone
>
> hi
>
>>       I am new to hama. in our project ,my team leader let me upload  old
>> code, run it on hadoop with parallel matrix computation.
>
> hama has it's own mailing list and this question is probably better
> asked there. see http://incubator.apache.org/hama/mailing_lists.html
>
> thanks
>
> - robert
>

-- 
Best Regards, Edward J. Yoon @ NHN, corp.
edwardyoon@apache.org
http://blog.udanax.org

Re: Hama Problem

Posted by Robert Burrell Donkin <ro...@gmail.com>.

On Wed, May 20, 2009 at 8:08 AM, ykj <yk...@163.com> wrote:
>
> Hello,everyone

hi

>       I am new to hama. in our project ,my team leader let me upload  old
> code, run it on hadoop with parallel matrix computation.

hama has it's own mailing list and this question is probably better
asked there. see http://incubator.apache.org/hama/mailing_lists.html

thanks

- robert

Re: Hama Problem

Posted by ykj <yk...@163.com>.


I  work in this project.Team leader assign me this task.I have no choice.I
thank the reason why we don't choice  mpi matrix  parallel  computation is
that maybe hadoop is more popular than before.can you give me some advice
about my requstion?It is strange that I suscribe to this mailing list,but I
can accept any 
mail.I already response hama-user-help's mail.
-- 
View this message in context: http://www.nabble.com/Hama--Problem-tp23630187p23699938.html
Sent from the Hadoop core-user mailing list archive at Nabble.com.

Re: Hama Problem

Posted by "Edward J. Yoon" <ed...@apache.org>.

Hi,

Before consider this, let's talk about your problem and why do you
want to use these. If your application isn't huge then I think
MPI-based matrix package could be much helpful to you since Hama
concept also is the large-scale, not high performance for small
matrices.

And, Have you tried to subscribe/mail here:
http://incubator.apache.org/hama/mailing_lists.html

On Fri, May 22, 2009 at 4:51 PM, ykj <yk...@163.com> wrote:
>
>
> Currently in Hama, eigenvalue decomposition is not  implement.So In STEP 4,
> it is hard to migrate it.so I
>
> work out an idea to bypass it. before Step 4, I can let L be
> denseMatrix.when I come to Step 4, I can
> transform L into  submatrix.in Jama,eigenvalue decomposition is support
> although it is not parallel computing.So  I can get    eigValues ,eigVectors
> values.But after that in step 5,It need to sort two matrix.
>
> I want to use the hbase sort function.so Hwo can transform this two
> submatrix into two densematrix?
>
> or other way ?
>                /**
>                 * STEP 4
>                 *              Calculate the eigen values and vectors of this covariance matrix
>                 *
>                 *              % Get the eigenvectors (columns of Vectors) and eigenvalues (diag of
> Values)
>                 */
>                EigenvalueDecomposition eigen = L.eig();
>                eigValues       = eigen.getD();
>                eigVectors      = eigen.getV();
>
>
>                /**
>                 * STEP 5
>                 *              % Sort the vectors/values according to size of eigenvalue
>                 */
>                Matrix[] eigDVSorted = sortem(eigValues, eigVectors);
>                eigValues = eigDVSorted[0];
>                eigVectors = eigDVSorted[1];
>
>
>                /**
>                 * STEP 6
>                 *              % Convert the eigenvectors of A'*A into eigenvectors of A*A'
>                 */
>
>                eigVectors = A.times(eigVectors);
>
>
>                /**
>                 * STEP 7
>                 *              % Get the eigenvalues out of the diagonal matrix and
>                 *              % normalize them so the evalues are specifically for cov(A'), not
> A*A'.
>                 */
>                double[] values = diag(eigValues);
>                for(int i = 0; i < values.length; i++)
>                        values[i] /= A.getColumnDimension() - 1;
>
>
>                /**
>                 * STEP 8
>                 *              % Normalize Vectors to unit length, kill vectors corr. to tiny
> evalues
>                 */
>                numEigenVecs = 0;
>                for(int i = 0; i < eigVectors.getColumnDimension(); i++) {
>                        Matrix tmp;
>                        if (values[i] < 0.0001)
>                        {
>                                tmp = new Matrix(eigVectors.getRowDimension(),1);
>                        }
>                        else
>                        {
>                                tmp = eigVectors.getMatrix(0,eigVectors.getRowDimension()-1,i,i).times(
>                                                1 / eigVectors.getMatrix(0, eigVectors.getRowDimension() - 1, i,
> i).normF());
>                                numEigenVecs++;
>                        }
>                        eigVectors.setMatrix(0,eigVectors.getRowDimension()-1,i,i,tmp);
>                        //eigVectors.timesEquals(1 / eigVectors.getMatrix(0,
> eigVectors.getRowDimension() - 1, i, i).normInf());
>                }
>                eigVectors = eigVectors.getMatrix(0,eigVectors.getRowDimension() - 1, 0,
> numEigenVecs - 1);
>
>                trained = true;
>
>
>                /*System.out.println("There are " + numGood + "
> eigenVectors\n\nEigenVectorSize");
>                System.out.println(eigVectors.getRowDimension());
>                System.out.println(eigVectors.getColumnDimension());
>                try {
>            PrintWriter pw = new PrintWriter("c:\\tmp\\test.txt");
>            eigVectors.print(pw, 8, 4);
>            pw.flush();
>            pw.close();
>        } catch (Exception e) {
>            e.printStackTrace();
>        }
>
>                int width = pics[0].img.getWidth(null);
>                BufferedImage biAvg = imageFromMatrix(bigAvg.getArrayCopy()[0], width);
>
>                try {
>            saveImage(new File("c:\\tmp\\test.jpg"), biAvg);
>        } catch (IOException e1) {
>            e1.printStackTrace();
>        }*/
>        }
>
>        /**
>         * Returns a number of eigenFace values to be used in a feature space
>         * @param pic
>         * @param number number of eigen feature values.
>         * @return will be of length number or this.getNumEigenVecs whichever is
> the smaller
>         */
>        public double[] getEigenFaces(Picture pic, int number)
>        {
>                if (number > numEigenVecs)              //adjust the number to the maxium number of
> eigen vectors availiable
>                        number = numEigenVecs;
>
>                double[] ret = new double[number];
>
>                double[] pixels = pic.getImagePixels();
>                Matrix face = new Matrix(pixels, pixels.length);
>                Matrix Vecs = eigVectors.getMatrix(0,eigVectors.getRowDimension()-1, 0,
> number-1).transpose();
>
>                Matrix rslt = Vecs.times(face);
>
>                for (int i=0; i<number; i++)
>                {
>                        ret[i] = rslt.get(i,0);
>                }
>
>                return ret;
>        }
>
>
>
>        /**
>         * Gets the diagonal of a matrix
>         * @param M matrix
>         * @return
>         */
>        private double[] diag(Matrix M) {
>            double[] dvec = new double[M.getColumnDimension()];
>            for(int i = 0; i < M.getColumnDimension(); i++)
>                dvec[i] = M.get(i, i);
>            return dvec;
>
>        }
>        /**
>         * Sorts the Eigenvalues and vectors in decending order
>         *
>         * @param D = eigen Values
>         * @param V = eigen Vectors
>         * @return
>         */
>        private Matrix[] sortem(Matrix D, Matrix V) {
>            //dvec = diag(D); // get diagonal components
>            double[] dvec = diag(D);
>
>            //NV = zeros(size(V));
>
>
>            //[dvec,index_dv] = sort(dvec); // sort dvec, maintain index in
> index_dv
>
>            class di_pair{ double value; int index; };
>            di_pair[] dvec_indexed = new di_pair[dvec.length];
>            for(int i = 0; i < dvec_indexed.length; i++) {
>                dvec_indexed[i] = new di_pair();
>                dvec_indexed[i].index = i;
>                dvec_indexed[i].value = dvec[i];
>            }
>
>            Comparator di_pair_sort = new Comparator() {
>            public int compare(Object arg0, Object arg1) {
>                di_pair lt = (di_pair)arg0;
>                di_pair rt = (di_pair)arg1;
>                double dif = (lt.value - rt.value);
>                if(dif > 0) return -1;
>                if(dif < 0) return 1;
>                else return 0;
>            }
>            };
>            Arrays.sort(dvec_indexed, di_pair_sort);
>
>            //index_dv = flipud(index_dv);
>            //for i = 1:size(D,1)
>            //  ND(i,i) = D(index_dv(i),index_dv(i));
>            //  NV(:,i) = V(:,index_dv(i));
>            //end;
>
>            Matrix D2 = new Matrix(D.getRowDimension(), D.getColumnDimension());
>            Matrix V2 = new Matrix(V.getRowDimension(), V.getColumnDimension());
>
>            for(int i = 0; i < dvec_indexed.length; i++) {
>                D2.set(i, i, D.get(dvec_indexed[i].index, dvec_indexed[i].index));
>                int height = V.getRowDimension() - 1;
>                Matrix tmp =
> V.getMatrix(dvec_indexed[i].index,dvec_indexed[i].index,0,height);
>                V2.setMatrix(i, i,0,height, tmp);
>            }
>            //TODO : Not sure why, but this has to be flipped - check this out
> maybe?
>            Matrix V3 = new Matrix(V.getRowDimension(), V.getColumnDimension());
>            for (int i=0; i<V3.getRowDimension(); i++)
>            {
>                for (int j=0; j< V3.getColumnDimension(); j++)
>                {
>                        V3.set(i,j,V2.get(V3.getRowDimension() - i - 1,
> V3.getColumnDimension() - j - 1));
>                }
>            }
>
>            return new Matrix[] { D2, V3 };
>        }
>
>
>        public boolean isTrained() {
>                return trained;
>        }
>        public int getNumEigenVecs() {
>                return numEigenVecs;
>        }
> }
>
>
> any advice will  be appreciated.thanks in advance.
>
>
> --
> View this message in context: http://www.nabble.com/Hama--Problem-tp23630187p23666077.html
> Sent from the Hadoop core-user mailing list archive at Nabble.com.
>
>



-- 
Best Regards, Edward J. Yoon @ NHN, corp.
edwardyoon@apache.org
http://blog.udanax.org

Re: Hama Problem

Posted by ykj <yk...@163.com>.


Currently in Hama, eigenvalue decomposition is not  implement.So In STEP 4,
it is hard to migrate it.so I 

work out an idea to bypass it. before Step 4, I can let L be
denseMatrix.when I come to Step 4, I can 
transform L into  submatrix.in Jama,eigenvalue decomposition is support
although it is not parallel computing.So  I can get    eigValues ,eigVectors
values.But after that in step 5,It need to sort two matrix.

I want to use the hbase sort function.so Hwo can transform this two 
submatrix into two densematrix?

or other way ?
		/**
		 * STEP 4
		 * 		Calculate the eigen values and vectors of this covariance matrix
		 * 
		 * 		% Get the eigenvectors (columns of Vectors) and eigenvalues (diag of
Values)
		 */
		EigenvalueDecomposition eigen = L.eig();
		eigValues 	= eigen.getD();
		eigVectors 	= eigen.getV();
		
		
		/**
		 * STEP 5
		 * 		% Sort the vectors/values according to size of eigenvalue
		 */
		Matrix[] eigDVSorted = sortem(eigValues, eigVectors);
		eigValues = eigDVSorted[0];
		eigVectors = eigDVSorted[1];
			
		
		/**
		 * STEP 6
		 * 		% Convert the eigenvectors of A'*A into eigenvectors of A*A'
		 */

		eigVectors = A.times(eigVectors);
		
		
		/**
		 * STEP 7
		 * 		% Get the eigenvalues out of the diagonal matrix and
		 *		% normalize them so the evalues are specifically for cov(A'), not
A*A'.
		 */
		double[] values = diag(eigValues);
		for(int i = 0; i < values.length; i++)
			values[i] /= A.getColumnDimension() - 1;
		
		
		/**
		 * STEP 8
		 * 		% Normalize Vectors to unit length, kill vectors corr. to tiny
evalues
		 */
		numEigenVecs = 0;
		for(int i = 0; i < eigVectors.getColumnDimension(); i++) {
			Matrix tmp;
			if (values[i] < 0.0001)
			{
				tmp = new Matrix(eigVectors.getRowDimension(),1);
			}
			else
			{
				tmp = eigVectors.getMatrix(0,eigVectors.getRowDimension()-1,i,i).times(
						1 / eigVectors.getMatrix(0, eigVectors.getRowDimension() - 1, i,
i).normF());
				numEigenVecs++;
			}
			eigVectors.setMatrix(0,eigVectors.getRowDimension()-1,i,i,tmp);
			//eigVectors.timesEquals(1 / eigVectors.getMatrix(0,
eigVectors.getRowDimension() - 1, i, i).normInf());
		}
		eigVectors = eigVectors.getMatrix(0,eigVectors.getRowDimension() - 1, 0,
numEigenVecs - 1);
		
		trained = true;
        
		
		/*System.out.println("There are " + numGood + "
eigenVectors\n\nEigenVectorSize");
		System.out.println(eigVectors.getRowDimension());
		System.out.println(eigVectors.getColumnDimension());
		try {
            PrintWriter pw = new PrintWriter("c:\\tmp\\test.txt");
            eigVectors.print(pw, 8, 4);
            pw.flush();
            pw.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
        
		int width = pics[0].img.getWidth(null);
		BufferedImage biAvg = imageFromMatrix(bigAvg.getArrayCopy()[0], width);
		
		try {
            saveImage(new File("c:\\tmp\\test.jpg"), biAvg);
        } catch (IOException e1) {
            e1.printStackTrace();
        }*/
	}
	
	/**
	 * Returns a number of eigenFace values to be used in a feature space
	 * @param pic
	 * @param number number of eigen feature values.  
	 * @return will be of length number or this.getNumEigenVecs whichever is
the smaller
	 */
	public double[] getEigenFaces(Picture pic, int number)
	{
		if (number > numEigenVecs)		//adjust the number to the maxium number of
eigen vectors availiable
			number = numEigenVecs;
		
		double[] ret = new double[number];
		
		double[] pixels = pic.getImagePixels();
		Matrix face = new Matrix(pixels, pixels.length);
		Matrix Vecs = eigVectors.getMatrix(0,eigVectors.getRowDimension()-1, 0,
number-1).transpose();
		
		Matrix rslt = Vecs.times(face);
		
		for (int i=0; i<number; i++)
		{
			ret[i] = rslt.get(i,0);
		}
		
		return ret;
	}
	
	

	/**
	 * Gets the diagonal of a matrix
	 * @param M matrix
	 * @return
	 */
	private double[] diag(Matrix M) {
	    double[] dvec = new double[M.getColumnDimension()];
	    for(int i = 0; i < M.getColumnDimension(); i++)
	        dvec[i] = M.get(i, i);
	    return dvec;
	    
	}
	/**
	 * Sorts the Eigenvalues and vectors in decending order
	 * 
	 * @param D = eigen Values
	 * @param V = eigen Vectors
	 * @return
	 */
	private Matrix[] sortem(Matrix D, Matrix V) {
	    //dvec = diag(D); // get diagonal components
	    double[] dvec = diag(D);
	    		    
	    //NV = zeros(size(V));
	    
	    
	    //[dvec,index_dv] = sort(dvec); // sort dvec, maintain index in
index_dv
	    
	    class di_pair{ double value; int index; };
	    di_pair[] dvec_indexed = new di_pair[dvec.length];
	    for(int i = 0; i < dvec_indexed.length; i++) {
	        dvec_indexed[i] = new di_pair();
	        dvec_indexed[i].index = i;
	        dvec_indexed[i].value = dvec[i];
	    }
	        
	    Comparator di_pair_sort = new Comparator() {
            public int compare(Object arg0, Object arg1) {
                di_pair lt = (di_pair)arg0;
                di_pair rt = (di_pair)arg1;
                double dif = (lt.value - rt.value);
                if(dif > 0) return -1;
                if(dif < 0) return 1;
                else return 0;
            }
	    };
	    Arrays.sort(dvec_indexed, di_pair_sort);
	    
	    //index_dv = flipud(index_dv);
	    //for i = 1:size(D,1)
	    //  ND(i,i) = D(index_dv(i),index_dv(i));
	    //  NV(:,i) = V(:,index_dv(i));
	    //end;

	    Matrix D2 = new Matrix(D.getRowDimension(), D.getColumnDimension());
	    Matrix V2 = new Matrix(V.getRowDimension(), V.getColumnDimension());
	    
	    for(int i = 0; i < dvec_indexed.length; i++) {
	        D2.set(i, i, D.get(dvec_indexed[i].index, dvec_indexed[i].index));
	        int height = V.getRowDimension() - 1;
	        Matrix tmp =
V.getMatrix(dvec_indexed[i].index,dvec_indexed[i].index,0,height);
	        V2.setMatrix(i, i,0,height, tmp);
	    }
	    //TODO : Not sure why, but this has to be flipped - check this out
maybe?
	    Matrix V3 = new Matrix(V.getRowDimension(), V.getColumnDimension());
	    for (int i=0; i<V3.getRowDimension(); i++)
	    {
	    	for (int j=0; j< V3.getColumnDimension(); j++)
	    	{
	    		V3.set(i,j,V2.get(V3.getRowDimension() - i - 1,
V3.getColumnDimension() - j - 1));
	    	}
	    }
	    
	    return new Matrix[] { D2, V3 };
	}
	
	
	public boolean isTrained() {
		return trained;
	}
	public int getNumEigenVecs() {
		return numEigenVecs;
	}
}


any advice will  be appreciated.thanks in advance.


-- 
View this message in context: http://www.nabble.com/Hama--Problem-tp23630187p23666077.html
Sent from the Hadoop core-user mailing list archive at Nabble.com.