You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-user@hadoop.apache.org by ykj <yk...@163.com> on 2009/05/20 09:08:49 UTC
Hama Problem
Hello,everyone
I am new to hama. in our project ,my team leader let me upload old
code, run it on hadoop with parallel matrix computation.this is old code:
public class EigenFaceGenerator {
Matrix averageFace; //stores the average face useful when probing the
database
Matrix eigVectors; //stores all the sorted eigen vectors from the
training set
Matrix eigValues; //Stores all the sorted eigen Values from the training
set
boolean trained = false; //has a training set been provided yet?
int numEigenVecs = 0; //number of eigen vectors availiable
/**
*
* @param faces array of pictures to be used for the training
* @param progress
*/
public void processTrainingSet(Face[] faces)
{
//TODO : there are errors that can be thrown when no fiels are parsed into
here
//TODO : Check that all the images are the same size
/**
* STEP 1
* Read in the images, flatten them out into one row of values, and
stack in a big matrix
*/
double[][] dpix = new
double[faces.length][faces[0].picture.getImagePixels().length];
for(int i=0; i<faces.length; i++)
{ //for each picture in the set
double[] pixels = faces[i].picture.getImagePixels();
for (int j=0; j<pixels.length; j++)
{
dpix[i][j] = pixels[j];
}
}
//make matrix of stacked flattened images
Matrix matrix = new Matrix(dpix);
/**
* STEP 2
* Calculate the average face, and then take this away from each of the
image
* effectivly calculating the difference form the average.
*/
//compute the average image
averageFace = new Matrix(1,matrix.getColumnDimension());
for(int i=0; i<matrix.getRowDimension(); i++)
{
averageFace.plusEquals(matrix.getMatrix(i,i,0,matrix.getColumnDimension()
- 1));
}
averageFace.timesEquals(1.0/(double)matrix.getRowDimension()); //divide by
the number of pixels to get the average
Matrix bigAvg = new Matrix(matrix.getRowDimension(),
matrix.getColumnDimension());
for (int i=0; i<bigAvg.getRowDimension(); i++)
{
bigAvg.setMatrix(i,i,0,bigAvg.getColumnDimension()-1,averageFace);
}
// Compute the diference form the average face for each image
Matrix A = matrix.minus(bigAvg).transpose();
/**
* STEP 3
* Now compute the the patternwise (nexamp x nexamp) covariance matrix
*/
// TODO : for the presentation work out why this is done, and what it's
telling us
Matrix At = A.transpose();
Matrix L = At.times(A);
/**
* STEP 4
* Calculate the eigen values and vectors of this covariance matrix
*
* % Get the eigenvectors (columns of Vectors) and eigenvalues (diag of
Values)
*/
EigenvalueDecomposition eigen = L.eig();
eigValues = eigen.getD();
eigVectors = eigen.getV();
/**
* STEP 5
* % Sort the vectors/values according to size of eigenvalue
*/
Matrix[] eigDVSorted = sortem(eigValues, eigVectors);
eigValues = eigDVSorted[0];
eigVectors = eigDVSorted[1];
/**
* STEP 6
* % Convert the eigenvectors of A'*A into eigenvectors of A*A'
*/
eigVectors = A.times(eigVectors);
/**
* STEP 7
* % Get the eigenvalues out of the diagonal matrix and
* % normalize them so the evalues are specifically for cov(A'), not
A*A'.
*/
double[] values = diag(eigValues);
for(int i = 0; i < values.length; i++)
values[i] /= A.getColumnDimension() - 1;
/**
* STEP 8
* % Normalize Vectors to unit length, kill vectors corr. to tiny
evalues
*/
numEigenVecs = 0;
for(int i = 0; i < eigVectors.getColumnDimension(); i++) {
Matrix tmp;
if (values[i] < 0.0001)
{
tmp = new Matrix(eigVectors.getRowDimension(),1);
}
else
{
tmp = eigVectors.getMatrix(0,eigVectors.getRowDimension()-1,i,i).times(
1 / eigVectors.getMatrix(0, eigVectors.getRowDimension() - 1, i,
i).normF());
numEigenVecs++;
}
eigVectors.setMatrix(0,eigVectors.getRowDimension()-1,i,i,tmp);
//eigVectors.timesEquals(1 / eigVectors.getMatrix(0,
eigVectors.getRowDimension() - 1, i, i).normInf());
}
eigVectors = eigVectors.getMatrix(0,eigVectors.getRowDimension() - 1, 0,
numEigenVecs - 1);
trained = true;
/*System.out.println("There are " + numGood + "
eigenVectors\n\nEigenVectorSize");
System.out.println(eigVectors.getRowDimension());
System.out.println(eigVectors.getColumnDimension());
try {
PrintWriter pw = new PrintWriter("c:\\tmp\\test.txt");
eigVectors.print(pw, 8, 4);
pw.flush();
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
int width = pics[0].img.getWidth(null);
BufferedImage biAvg = imageFromMatrix(bigAvg.getArrayCopy()[0], width);
try {
saveImage(new File("c:\\tmp\\test.jpg"), biAvg);
} catch (IOException e1) {
e1.printStackTrace();
}*/
}
/**
* Returns a number of eigenFace values to be used in a feature space
* @param pic
* @param number number of eigen feature values.
* @return will be of length number or this.getNumEigenVecs whichever is
the smaller
*/
public double[] getEigenFaces(Picture pic, int number)
{
if (number > numEigenVecs) //adjust the number to the maxium number of
eigen vectors availiable
number = numEigenVecs;
double[] ret = new double[number];
double[] pixels = pic.getImagePixels();
Matrix face = new Matrix(pixels, pixels.length);
Matrix Vecs = eigVectors.getMatrix(0,eigVectors.getRowDimension()-1, 0,
number-1).transpose();
Matrix rslt = Vecs.times(face);
for (int i=0; i<number; i++)
{
ret[i] = rslt.get(i,0);
}
return ret;
}
/**
* Gets the diagonal of a matrix
* @param M matrix
* @return
*/
private double[] diag(Matrix M) {
double[] dvec = new double[M.getColumnDimension()];
for(int i = 0; i < M.getColumnDimension(); i++)
dvec[i] = M.get(i, i);
return dvec;
}
/**
* Sorts the Eigenvalues and vectors in decending order
*
* @param D = eigen Values
* @param V = eigen Vectors
* @return
*/
private Matrix[] sortem(Matrix D, Matrix V) {
//dvec = diag(D); // get diagonal components
double[] dvec = diag(D);
//NV = zeros(size(V));
//[dvec,index_dv] = sort(dvec); // sort dvec, maintain index in
index_dv
class di_pair{ double value; int index; };
di_pair[] dvec_indexed = new di_pair[dvec.length];
for(int i = 0; i < dvec_indexed.length; i++) {
dvec_indexed[i] = new di_pair();
dvec_indexed[i].index = i;
dvec_indexed[i].value = dvec[i];
}
Comparator di_pair_sort = new Comparator() {
public int compare(Object arg0, Object arg1) {
di_pair lt = (di_pair)arg0;
di_pair rt = (di_pair)arg1;
double dif = (lt.value - rt.value);
if(dif > 0) return -1;
if(dif < 0) return 1;
else return 0;
}
};
Arrays.sort(dvec_indexed, di_pair_sort);
//index_dv = flipud(index_dv);
//for i = 1:size(D,1)
// ND(i,i) = D(index_dv(i),index_dv(i));
// NV(:,i) = V(:,index_dv(i));
//end;
Matrix D2 = new Matrix(D.getRowDimension(), D.getColumnDimension());
Matrix V2 = new Matrix(V.getRowDimension(), V.getColumnDimension());
for(int i = 0; i < dvec_indexed.length; i++) {
D2.set(i, i, D.get(dvec_indexed[i].index, dvec_indexed[i].index));
int height = V.getRowDimension() - 1;
Matrix tmp =
V.getMatrix(dvec_indexed[i].index,dvec_indexed[i].index,0,height);
V2.setMatrix(i, i,0,height, tmp);
}
//TODO : Not sure why, but this has to be flipped - check this out
maybe?
Matrix V3 = new Matrix(V.getRowDimension(), V.getColumnDimension());
for (int i=0; i<V3.getRowDimension(); i++)
{
for (int j=0; j< V3.getColumnDimension(); j++)
{
V3.set(i,j,V2.get(V3.getRowDimension() - i - 1,
V3.getColumnDimension() - j - 1));
}
}
return new Matrix[] { D2, V3 };
}
public boolean isTrained() {
return trained;
}
public int getNumEigenVecs() {
return numEigenVecs;
}
}
I am not sure this code easily can be parallel computed,or how to change
this code to add the parallel compuation.any advice will be
appreciated.thanks in advance.
--
View this message in context: http://www.nabble.com/Hama--Problem-tp23630187p23630187.html
Sent from the Hadoop core-user mailing list archive at Nabble.com.
Re: Hama Problem
Posted by "Edward J. Yoon" <ed...@apache.org>.
I think... If we construct the small matrix, we maybe don't need to
use the hbase before the store it.
On Wed, May 20, 2009 at 11:16 PM, Edward J. Yoon <ed...@apache.org> wrote:
> //make matrix of stacked flattened images
> Matrix matrix = new Matrix(dpix);
>
> Hmm, Should we add this constructor for Image? IMO, it can be added to
> DenseMatrix, but it could be really small.
>
> AFAIK, In commons.math2, linear algebra will be added. It would be
> nice if we use it as a local version.
>
> Any comments are welcome.
>
> On Wed, May 20, 2009 at 10:53 PM, Edward J. Yoon <ed...@apache.org> wrote:
>> Hi,
>>
>>> I am not sure this code easily can be parallel computed,or how to change
>>> this code to add the parallel compuation.any advice will be
>>> appreciated.thanks in advance.
>>
>> OK, I'm sure it could be run on Hama/Hadoop.
>>
>> According to my understanding of your code, It's a PCA. If you have an
>> M images as represented N^2 * 1 vectors, vector will be nomalized by
>> step-2. In step-3, the covariance matrix will be constructed using
>> matrix A by deviation of images and A^T. Finally you'll get the
>> eigenfaces using eigen values/vectors.
>>
>> There are two large computing component in the step-3 and step-4, such
>> as N^2 by N^2 matrix transpose, multiplication and eigen
>> values/vectors of M by M matrix.
>>
>> Currently in Hama, multiplication and transpose were implemented. If
>> eigenvalue decomposition is implement, you can easily migrate code.
>>
>>> hama has it's own mailing list and this question is probably better
>>> asked there. see http://incubator.apache.org/hama/mailing_lists.html
>>
>> Yes, we have own mailing list. Please contact us.
>>
>> Thanks,
>> Edward
>>
>> On Wed, May 20, 2009 at 4:51 PM, Robert Burrell Donkin
>> <ro...@gmail.com> wrote:
>>> On Wed, May 20, 2009 at 8:08 AM, ykj <yk...@163.com> wrote:
>>>>
>>>> Hello,everyone
>>>
>>> hi
>>>
>>>> I am new to hama. in our project ,my team leader let me upload old
>>>> code, run it on hadoop with parallel matrix computation.
>>>
>>> hama has it's own mailing list and this question is probably better
>>> asked there. see http://incubator.apache.org/hama/mailing_lists.html
>>>
>>> thanks
>>>
>>> - robert
>>>
>>
>>
>>
>> --
>> Best Regards, Edward J. Yoon @ NHN, corp.
>> edwardyoon@apache.org
>> http://blog.udanax.org
>>
>
>
>
> --
> Best Regards, Edward J. Yoon @ NHN, corp.
> edwardyoon@apache.org
> http://blog.udanax.org
>
--
Best Regards, Edward J. Yoon @ NHN, corp.
edwardyoon@apache.org
http://blog.udanax.org
MO,
Re: Hama Problem
Posted by "Edward J. Yoon" <ed...@apache.org>.
//make matrix of stacked flattened images
Matrix matrix = new Matrix(dpix);
Hmm, Should we add this constructor for Image? IMO, it can be added to
DenseMatrix, but it could be really small.
AFAIK, In commons.math2, linear algebra will be added. It would be
nice if we use it as a local version.
Any comments are welcome.
On Wed, May 20, 2009 at 10:53 PM, Edward J. Yoon <ed...@apache.org> wrote:
> Hi,
>
>> I am not sure this code easily can be parallel computed,or how to change
>> this code to add the parallel compuation.any advice will be
>> appreciated.thanks in advance.
>
> OK, I'm sure it could be run on Hama/Hadoop.
>
> According to my understanding of your code, It's a PCA. If you have an
> M images as represented N^2 * 1 vectors, vector will be nomalized by
> step-2. In step-3, the covariance matrix will be constructed using
> matrix A by deviation of images and A^T. Finally you'll get the
> eigenfaces using eigen values/vectors.
>
> There are two large computing component in the step-3 and step-4, such
> as N^2 by N^2 matrix transpose, multiplication and eigen
> values/vectors of M by M matrix.
>
> Currently in Hama, multiplication and transpose were implemented. If
> eigenvalue decomposition is implement, you can easily migrate code.
>
>> hama has it's own mailing list and this question is probably better
>> asked there. see http://incubator.apache.org/hama/mailing_lists.html
>
> Yes, we have own mailing list. Please contact us.
>
> Thanks,
> Edward
>
> On Wed, May 20, 2009 at 4:51 PM, Robert Burrell Donkin
> <ro...@gmail.com> wrote:
>> On Wed, May 20, 2009 at 8:08 AM, ykj <yk...@163.com> wrote:
>>>
>>> Hello,everyone
>>
>> hi
>>
>>> I am new to hama. in our project ,my team leader let me upload old
>>> code, run it on hadoop with parallel matrix computation.
>>
>> hama has it's own mailing list and this question is probably better
>> asked there. see http://incubator.apache.org/hama/mailing_lists.html
>>
>> thanks
>>
>> - robert
>>
>
>
>
> --
> Best Regards, Edward J. Yoon @ NHN, corp.
> edwardyoon@apache.org
> http://blog.udanax.org
>
--
Best Regards, Edward J. Yoon @ NHN, corp.
edwardyoon@apache.org
http://blog.udanax.org
Re: Hama Problem
Posted by "Edward J. Yoon" <ed...@apache.org>.
Hi,
> I am not sure this code easily can be parallel computed,or how to change
> this code to add the parallel compuation.any advice will be
> appreciated.thanks in advance.
OK, I'm sure it could be run on Hama/Hadoop.
According to my understanding of your code, It's a PCA. If you have an
M images as represented N^2 * 1 vectors, vector will be nomalized by
step-2. In step-3, the covariance matrix will be constructed using
matrix A by deviation of images and A^T. Finally you'll get the
eigenfaces using eigen values/vectors.
There are two large computing component in the step-3 and step-4, such
as N^2 by N^2 matrix transpose, multiplication and eigen
values/vectors of M by M matrix.
Currently in Hama, multiplication and transpose were implemented. If
eigenvalue decomposition is implement, you can easily migrate code.
> hama has it's own mailing list and this question is probably better
> asked there. see http://incubator.apache.org/hama/mailing_lists.html
Yes, we have own mailing list. Please contact us.
Thanks,
Edward
On Wed, May 20, 2009 at 4:51 PM, Robert Burrell Donkin
<ro...@gmail.com> wrote:
> On Wed, May 20, 2009 at 8:08 AM, ykj <yk...@163.com> wrote:
>>
>> Hello,everyone
>
> hi
>
>> I am new to hama. in our project ,my team leader let me upload old
>> code, run it on hadoop with parallel matrix computation.
>
> hama has it's own mailing list and this question is probably better
> asked there. see http://incubator.apache.org/hama/mailing_lists.html
>
> thanks
>
> - robert
>
--
Best Regards, Edward J. Yoon @ NHN, corp.
edwardyoon@apache.org
http://blog.udanax.org
Re: Hama Problem
Posted by "Edward J. Yoon" <ed...@apache.org>.
Hi,
> I am not sure this code easily can be parallel computed,or how to change
> this code to add the parallel compuation.any advice will be
> appreciated.thanks in advance.
OK, I'm sure it could be run on Hama/Hadoop.
According to my understanding of your code, It's a PCA. If you have an
M images as represented N^2 * 1 vectors, vector will be nomalized by
step-2. In step-3, the covariance matrix will be constructed using
matrix A by deviation of images and A^T. Finally you'll get the
eigenfaces using eigen values/vectors.
There are two large computing component in the step-3 and step-4, such
as N^2 by N^2 matrix transpose, multiplication and eigen
values/vectors of M by M matrix.
Currently in Hama, multiplication and transpose were implemented. If
eigenvalue decomposition is implement, you can easily migrate code.
> hama has it's own mailing list and this question is probably better
> asked there. see http://incubator.apache.org/hama/mailing_lists.html
Yes, we have own mailing list. Please contact us.
Thanks,
Edward
On Wed, May 20, 2009 at 4:51 PM, Robert Burrell Donkin
<ro...@gmail.com> wrote:
> On Wed, May 20, 2009 at 8:08 AM, ykj <yk...@163.com> wrote:
>>
>> Hello,everyone
>
> hi
>
>> I am new to hama. in our project ,my team leader let me upload old
>> code, run it on hadoop with parallel matrix computation.
>
> hama has it's own mailing list and this question is probably better
> asked there. see http://incubator.apache.org/hama/mailing_lists.html
>
> thanks
>
> - robert
>
--
Best Regards, Edward J. Yoon @ NHN, corp.
edwardyoon@apache.org
http://blog.udanax.org
Re: Hama Problem
Posted by Robert Burrell Donkin <ro...@gmail.com>.
On Wed, May 20, 2009 at 8:08 AM, ykj <yk...@163.com> wrote:
>
> Hello,everyone
hi
> I am new to hama. in our project ,my team leader let me upload old
> code, run it on hadoop with parallel matrix computation.
hama has it's own mailing list and this question is probably better
asked there. see http://incubator.apache.org/hama/mailing_lists.html
thanks
- robert
Re: Hama Problem
Posted by ykj <yk...@163.com>.
I work in this project.Team leader assign me this task.I have no choice.I
thank the reason why we don't choice mpi matrix parallel computation is
that maybe hadoop is more popular than before.can you give me some advice
about my requstion?It is strange that I suscribe to this mailing list,but I
can accept any
mail.I already response hama-user-help's mail.
--
View this message in context: http://www.nabble.com/Hama--Problem-tp23630187p23699938.html
Sent from the Hadoop core-user mailing list archive at Nabble.com.
Re: Hama Problem
Posted by "Edward J. Yoon" <ed...@apache.org>.
Hi,
Before consider this, let's talk about your problem and why do you
want to use these. If your application isn't huge then I think
MPI-based matrix package could be much helpful to you since Hama
concept also is the large-scale, not high performance for small
matrices.
And, Have you tried to subscribe/mail here:
http://incubator.apache.org/hama/mailing_lists.html
On Fri, May 22, 2009 at 4:51 PM, ykj <yk...@163.com> wrote:
>
>
> Currently in Hama, eigenvalue decomposition is not implement.So In STEP 4,
> it is hard to migrate it.so I
>
> work out an idea to bypass it. before Step 4, I can let L be
> denseMatrix.when I come to Step 4, I can
> transform L into submatrix.in Jama,eigenvalue decomposition is support
> although it is not parallel computing.So I can get eigValues ,eigVectors
> values.But after that in step 5,It need to sort two matrix.
>
> I want to use the hbase sort function.so Hwo can transform this two
> submatrix into two densematrix?
>
> or other way ?
> /**
> * STEP 4
> * Calculate the eigen values and vectors of this covariance matrix
> *
> * % Get the eigenvectors (columns of Vectors) and eigenvalues (diag of
> Values)
> */
> EigenvalueDecomposition eigen = L.eig();
> eigValues = eigen.getD();
> eigVectors = eigen.getV();
>
>
> /**
> * STEP 5
> * % Sort the vectors/values according to size of eigenvalue
> */
> Matrix[] eigDVSorted = sortem(eigValues, eigVectors);
> eigValues = eigDVSorted[0];
> eigVectors = eigDVSorted[1];
>
>
> /**
> * STEP 6
> * % Convert the eigenvectors of A'*A into eigenvectors of A*A'
> */
>
> eigVectors = A.times(eigVectors);
>
>
> /**
> * STEP 7
> * % Get the eigenvalues out of the diagonal matrix and
> * % normalize them so the evalues are specifically for cov(A'), not
> A*A'.
> */
> double[] values = diag(eigValues);
> for(int i = 0; i < values.length; i++)
> values[i] /= A.getColumnDimension() - 1;
>
>
> /**
> * STEP 8
> * % Normalize Vectors to unit length, kill vectors corr. to tiny
> evalues
> */
> numEigenVecs = 0;
> for(int i = 0; i < eigVectors.getColumnDimension(); i++) {
> Matrix tmp;
> if (values[i] < 0.0001)
> {
> tmp = new Matrix(eigVectors.getRowDimension(),1);
> }
> else
> {
> tmp = eigVectors.getMatrix(0,eigVectors.getRowDimension()-1,i,i).times(
> 1 / eigVectors.getMatrix(0, eigVectors.getRowDimension() - 1, i,
> i).normF());
> numEigenVecs++;
> }
> eigVectors.setMatrix(0,eigVectors.getRowDimension()-1,i,i,tmp);
> //eigVectors.timesEquals(1 / eigVectors.getMatrix(0,
> eigVectors.getRowDimension() - 1, i, i).normInf());
> }
> eigVectors = eigVectors.getMatrix(0,eigVectors.getRowDimension() - 1, 0,
> numEigenVecs - 1);
>
> trained = true;
>
>
> /*System.out.println("There are " + numGood + "
> eigenVectors\n\nEigenVectorSize");
> System.out.println(eigVectors.getRowDimension());
> System.out.println(eigVectors.getColumnDimension());
> try {
> PrintWriter pw = new PrintWriter("c:\\tmp\\test.txt");
> eigVectors.print(pw, 8, 4);
> pw.flush();
> pw.close();
> } catch (Exception e) {
> e.printStackTrace();
> }
>
> int width = pics[0].img.getWidth(null);
> BufferedImage biAvg = imageFromMatrix(bigAvg.getArrayCopy()[0], width);
>
> try {
> saveImage(new File("c:\\tmp\\test.jpg"), biAvg);
> } catch (IOException e1) {
> e1.printStackTrace();
> }*/
> }
>
> /**
> * Returns a number of eigenFace values to be used in a feature space
> * @param pic
> * @param number number of eigen feature values.
> * @return will be of length number or this.getNumEigenVecs whichever is
> the smaller
> */
> public double[] getEigenFaces(Picture pic, int number)
> {
> if (number > numEigenVecs) //adjust the number to the maxium number of
> eigen vectors availiable
> number = numEigenVecs;
>
> double[] ret = new double[number];
>
> double[] pixels = pic.getImagePixels();
> Matrix face = new Matrix(pixels, pixels.length);
> Matrix Vecs = eigVectors.getMatrix(0,eigVectors.getRowDimension()-1, 0,
> number-1).transpose();
>
> Matrix rslt = Vecs.times(face);
>
> for (int i=0; i<number; i++)
> {
> ret[i] = rslt.get(i,0);
> }
>
> return ret;
> }
>
>
>
> /**
> * Gets the diagonal of a matrix
> * @param M matrix
> * @return
> */
> private double[] diag(Matrix M) {
> double[] dvec = new double[M.getColumnDimension()];
> for(int i = 0; i < M.getColumnDimension(); i++)
> dvec[i] = M.get(i, i);
> return dvec;
>
> }
> /**
> * Sorts the Eigenvalues and vectors in decending order
> *
> * @param D = eigen Values
> * @param V = eigen Vectors
> * @return
> */
> private Matrix[] sortem(Matrix D, Matrix V) {
> //dvec = diag(D); // get diagonal components
> double[] dvec = diag(D);
>
> //NV = zeros(size(V));
>
>
> //[dvec,index_dv] = sort(dvec); // sort dvec, maintain index in
> index_dv
>
> class di_pair{ double value; int index; };
> di_pair[] dvec_indexed = new di_pair[dvec.length];
> for(int i = 0; i < dvec_indexed.length; i++) {
> dvec_indexed[i] = new di_pair();
> dvec_indexed[i].index = i;
> dvec_indexed[i].value = dvec[i];
> }
>
> Comparator di_pair_sort = new Comparator() {
> public int compare(Object arg0, Object arg1) {
> di_pair lt = (di_pair)arg0;
> di_pair rt = (di_pair)arg1;
> double dif = (lt.value - rt.value);
> if(dif > 0) return -1;
> if(dif < 0) return 1;
> else return 0;
> }
> };
> Arrays.sort(dvec_indexed, di_pair_sort);
>
> //index_dv = flipud(index_dv);
> //for i = 1:size(D,1)
> // ND(i,i) = D(index_dv(i),index_dv(i));
> // NV(:,i) = V(:,index_dv(i));
> //end;
>
> Matrix D2 = new Matrix(D.getRowDimension(), D.getColumnDimension());
> Matrix V2 = new Matrix(V.getRowDimension(), V.getColumnDimension());
>
> for(int i = 0; i < dvec_indexed.length; i++) {
> D2.set(i, i, D.get(dvec_indexed[i].index, dvec_indexed[i].index));
> int height = V.getRowDimension() - 1;
> Matrix tmp =
> V.getMatrix(dvec_indexed[i].index,dvec_indexed[i].index,0,height);
> V2.setMatrix(i, i,0,height, tmp);
> }
> //TODO : Not sure why, but this has to be flipped - check this out
> maybe?
> Matrix V3 = new Matrix(V.getRowDimension(), V.getColumnDimension());
> for (int i=0; i<V3.getRowDimension(); i++)
> {
> for (int j=0; j< V3.getColumnDimension(); j++)
> {
> V3.set(i,j,V2.get(V3.getRowDimension() - i - 1,
> V3.getColumnDimension() - j - 1));
> }
> }
>
> return new Matrix[] { D2, V3 };
> }
>
>
> public boolean isTrained() {
> return trained;
> }
> public int getNumEigenVecs() {
> return numEigenVecs;
> }
> }
>
>
> any advice will be appreciated.thanks in advance.
>
>
> --
> View this message in context: http://www.nabble.com/Hama--Problem-tp23630187p23666077.html
> Sent from the Hadoop core-user mailing list archive at Nabble.com.
>
>
--
Best Regards, Edward J. Yoon @ NHN, corp.
edwardyoon@apache.org
http://blog.udanax.org
Re: Hama Problem
Posted by ykj <yk...@163.com>.
Currently in Hama, eigenvalue decomposition is not implement.So In STEP 4,
it is hard to migrate it.so I
work out an idea to bypass it. before Step 4, I can let L be
denseMatrix.when I come to Step 4, I can
transform L into submatrix.in Jama,eigenvalue decomposition is support
although it is not parallel computing.So I can get eigValues ,eigVectors
values.But after that in step 5,It need to sort two matrix.
I want to use the hbase sort function.so Hwo can transform this two
submatrix into two densematrix?
or other way ?
/**
* STEP 4
* Calculate the eigen values and vectors of this covariance matrix
*
* % Get the eigenvectors (columns of Vectors) and eigenvalues (diag of
Values)
*/
EigenvalueDecomposition eigen = L.eig();
eigValues = eigen.getD();
eigVectors = eigen.getV();
/**
* STEP 5
* % Sort the vectors/values according to size of eigenvalue
*/
Matrix[] eigDVSorted = sortem(eigValues, eigVectors);
eigValues = eigDVSorted[0];
eigVectors = eigDVSorted[1];
/**
* STEP 6
* % Convert the eigenvectors of A'*A into eigenvectors of A*A'
*/
eigVectors = A.times(eigVectors);
/**
* STEP 7
* % Get the eigenvalues out of the diagonal matrix and
* % normalize them so the evalues are specifically for cov(A'), not
A*A'.
*/
double[] values = diag(eigValues);
for(int i = 0; i < values.length; i++)
values[i] /= A.getColumnDimension() - 1;
/**
* STEP 8
* % Normalize Vectors to unit length, kill vectors corr. to tiny
evalues
*/
numEigenVecs = 0;
for(int i = 0; i < eigVectors.getColumnDimension(); i++) {
Matrix tmp;
if (values[i] < 0.0001)
{
tmp = new Matrix(eigVectors.getRowDimension(),1);
}
else
{
tmp = eigVectors.getMatrix(0,eigVectors.getRowDimension()-1,i,i).times(
1 / eigVectors.getMatrix(0, eigVectors.getRowDimension() - 1, i,
i).normF());
numEigenVecs++;
}
eigVectors.setMatrix(0,eigVectors.getRowDimension()-1,i,i,tmp);
//eigVectors.timesEquals(1 / eigVectors.getMatrix(0,
eigVectors.getRowDimension() - 1, i, i).normInf());
}
eigVectors = eigVectors.getMatrix(0,eigVectors.getRowDimension() - 1, 0,
numEigenVecs - 1);
trained = true;
/*System.out.println("There are " + numGood + "
eigenVectors\n\nEigenVectorSize");
System.out.println(eigVectors.getRowDimension());
System.out.println(eigVectors.getColumnDimension());
try {
PrintWriter pw = new PrintWriter("c:\\tmp\\test.txt");
eigVectors.print(pw, 8, 4);
pw.flush();
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
int width = pics[0].img.getWidth(null);
BufferedImage biAvg = imageFromMatrix(bigAvg.getArrayCopy()[0], width);
try {
saveImage(new File("c:\\tmp\\test.jpg"), biAvg);
} catch (IOException e1) {
e1.printStackTrace();
}*/
}
/**
* Returns a number of eigenFace values to be used in a feature space
* @param pic
* @param number number of eigen feature values.
* @return will be of length number or this.getNumEigenVecs whichever is
the smaller
*/
public double[] getEigenFaces(Picture pic, int number)
{
if (number > numEigenVecs) //adjust the number to the maxium number of
eigen vectors availiable
number = numEigenVecs;
double[] ret = new double[number];
double[] pixels = pic.getImagePixels();
Matrix face = new Matrix(pixels, pixels.length);
Matrix Vecs = eigVectors.getMatrix(0,eigVectors.getRowDimension()-1, 0,
number-1).transpose();
Matrix rslt = Vecs.times(face);
for (int i=0; i<number; i++)
{
ret[i] = rslt.get(i,0);
}
return ret;
}
/**
* Gets the diagonal of a matrix
* @param M matrix
* @return
*/
private double[] diag(Matrix M) {
double[] dvec = new double[M.getColumnDimension()];
for(int i = 0; i < M.getColumnDimension(); i++)
dvec[i] = M.get(i, i);
return dvec;
}
/**
* Sorts the Eigenvalues and vectors in decending order
*
* @param D = eigen Values
* @param V = eigen Vectors
* @return
*/
private Matrix[] sortem(Matrix D, Matrix V) {
//dvec = diag(D); // get diagonal components
double[] dvec = diag(D);
//NV = zeros(size(V));
//[dvec,index_dv] = sort(dvec); // sort dvec, maintain index in
index_dv
class di_pair{ double value; int index; };
di_pair[] dvec_indexed = new di_pair[dvec.length];
for(int i = 0; i < dvec_indexed.length; i++) {
dvec_indexed[i] = new di_pair();
dvec_indexed[i].index = i;
dvec_indexed[i].value = dvec[i];
}
Comparator di_pair_sort = new Comparator() {
public int compare(Object arg0, Object arg1) {
di_pair lt = (di_pair)arg0;
di_pair rt = (di_pair)arg1;
double dif = (lt.value - rt.value);
if(dif > 0) return -1;
if(dif < 0) return 1;
else return 0;
}
};
Arrays.sort(dvec_indexed, di_pair_sort);
//index_dv = flipud(index_dv);
//for i = 1:size(D,1)
// ND(i,i) = D(index_dv(i),index_dv(i));
// NV(:,i) = V(:,index_dv(i));
//end;
Matrix D2 = new Matrix(D.getRowDimension(), D.getColumnDimension());
Matrix V2 = new Matrix(V.getRowDimension(), V.getColumnDimension());
for(int i = 0; i < dvec_indexed.length; i++) {
D2.set(i, i, D.get(dvec_indexed[i].index, dvec_indexed[i].index));
int height = V.getRowDimension() - 1;
Matrix tmp =
V.getMatrix(dvec_indexed[i].index,dvec_indexed[i].index,0,height);
V2.setMatrix(i, i,0,height, tmp);
}
//TODO : Not sure why, but this has to be flipped - check this out
maybe?
Matrix V3 = new Matrix(V.getRowDimension(), V.getColumnDimension());
for (int i=0; i<V3.getRowDimension(); i++)
{
for (int j=0; j< V3.getColumnDimension(); j++)
{
V3.set(i,j,V2.get(V3.getRowDimension() - i - 1,
V3.getColumnDimension() - j - 1));
}
}
return new Matrix[] { D2, V3 };
}
public boolean isTrained() {
return trained;
}
public int getNumEigenVecs() {
return numEigenVecs;
}
}
any advice will be appreciated.thanks in advance.
--
View this message in context: http://www.nabble.com/Hama--Problem-tp23630187p23666077.html
Sent from the Hadoop core-user mailing list archive at Nabble.com.