You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dl...@apache.org on 2012/10/12 01:20:08 UTC

svn commit: r1397365 - in /mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop: MatrixColumnMeansJob.java stochasticsvd/BtJob.java stochasticsvd/SSVDCli.java stochasticsvd/SSVDSolver.java

Author: dlyubimov
Date: Thu Oct 11 23:20:07 2012
New Revision: 1397365

URL: http://svn.apache.org/viewvc?rev=1397365&view=rev
Log:
MAHOUT-1098, some housekeeping fixes in MatrixColumnMeansJob, -ow flag in --pca true mode.

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/BtJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java?rev=1397365&r1=1397364&r2=1397365&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java Thu Oct 11 23:20:07 2012
@@ -23,7 +23,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
@@ -56,7 +56,7 @@ public final class MatrixColumnMeansJob 
   public static Vector run(Configuration conf,
                            Path inputPath,
                            Path outputVectorTmpPath) throws IOException {
-    return run(conf, inputPath, outputVectorTmpPath, VECTOR_CLASS);
+    return run(conf, inputPath, outputVectorTmpPath, null);
   }
 
   /**
@@ -82,12 +82,11 @@ public final class MatrixColumnMeansJob 
                       vectorClass == null ? DenseVector.class.getName()
                           : vectorClass);
 
-      @SuppressWarnings("deprecation")
-      JobConf oldApiConf = new JobConf(initialConf);
+      Job job = new Job(initialConf, "MatrixColumnMeansJob");
+      job.setJarByClass(MatrixColumnMeansJob.class);
 
-      org.apache.hadoop.mapred.FileOutputFormat.setOutputPath(oldApiConf,
-                                                              outputVectorTmpPath);
-      Job job = new Job(initialConf);
+      FileOutputFormat.setOutputPath(job, outputVectorTmpPath);
+      
       outputVectorTmpPath.getFileSystem(job.getConfiguration())
                          .delete(outputVectorTmpPath, true);
       job.setNumReduceTasks(1);
@@ -108,7 +107,7 @@ public final class MatrixColumnMeansJob 
 
       Path tmpFile = new Path(outputVectorTmpPath, "part-r-00000");
       SequenceFileValueIterator<VectorWritable> iterator =
-        new SequenceFileValueIterator<VectorWritable>(tmpFile, true, oldApiConf);
+        new SequenceFileValueIterator<VectorWritable>(tmpFile, true, initialConf);
       try {
         if (iterator.hasNext()) {
           return iterator.next().get();
@@ -132,7 +131,7 @@ public final class MatrixColumnMeansJob 
    * Mapper for calculation of column-wise mean.
    */
   public static class MatrixColumnMeansMapper extends
-      Mapper<IntWritable, VectorWritable, NullWritable, VectorWritable> {
+      Mapper<Writable, VectorWritable, NullWritable, VectorWritable> {
 
     private Vector runningSum;
     private String vectorClass;
@@ -149,7 +148,7 @@ public final class MatrixColumnMeansJob 
      * column-wise running sum. Nothing is written at this stage
      */
     @Override
-    public void map(IntWritable r, VectorWritable v, Context context)
+    public void map(Writable r, VectorWritable v, Context context)
       throws IOException {
       if (runningSum == null) {
           /*

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/BtJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/BtJob.java?rev=1397365&r1=1397364&r2=1397365&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/BtJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/BtJob.java Thu Oct 11 23:20:07 2012
@@ -384,6 +384,10 @@ public final class BtJob {
       String xiPathStr = conf.get(PROP_XI_PATH);
       if (xiPathStr != null) {
         xi = SSVDHelper.loadAndSumUpVectors(new Path(xiPathStr), conf);
+        if (xi == null) {
+          throw new IOException(String.format("unable to load mean path xi from %s.",
+                                              xiPathStr));
+        }
       }
 
       if (outputBBt || xi != null) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java?rev=1397365&r1=1397364&r2=1397365&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java Thu Oct 11 23:20:07 2012
@@ -117,15 +117,26 @@ public class SSVDCli extends AbstractJob
     }
 
     Path[] inputPaths = { getInputPath() };
+    Path tempPath = getTempPath();
+    FileSystem fs = FileSystem.get(getOutputPath().toUri(), conf);
 
     // MAHOUT-817
     if (pca && xiPath == null) {
-      xiPath = new Path(getTempPath(), "xi");
-      MatrixColumnMeansJob.run(conf, inputPaths[0], getTempPath());
+      xiPath = new Path(tempPath, "xi");
+      if (overwrite) {
+        fs.delete(xiPath, true);
+      }
+      MatrixColumnMeansJob.run(conf, inputPaths[0], xiPath);
     }
 
     SSVDSolver solver =
-      new SSVDSolver(conf, inputPaths, getTempPath(), r, k, p, reduceTasks);
+      new SSVDSolver(conf,
+                     inputPaths,
+                     new Path(tempPath, "ssvd"),
+                     r,
+                     k,
+                     p,
+                     reduceTasks);
 
     solver.setMinSplitSize(minSplitSize);
     solver.setComputeU(computeU);
@@ -138,13 +149,14 @@ public class SSVDCli extends AbstractJob
     solver.setQ(q);
     solver.setBroadcast(broadcast);
     solver.setOverwrite(overwrite);
-    solver.setPcaMeanPath(xiPath);
+
+    if (xiPath != null) {
+      solver.setPcaMeanPath(new Path(xiPath, "part-*"));
+    }
 
     solver.run();
 
     // housekeeping
-    FileSystem fs = FileSystem.get(getOutputPath().toUri(), conf);
-
     if (overwrite) {
       fs.delete(getOutputPath(), true);
     }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java?rev=1397365&r1=1397364&r2=1397365&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java Thu Oct 11 23:20:07 2012
@@ -406,6 +406,11 @@ public final class SSVDSolver {
          */
 
         Vector xi = SSVDHelper.loadAndSumUpVectors(pcaMeanPath, conf);
+        if (xi == null) {
+          throw new IOException(String.format("unable to load mean path xi from %s.",
+                                              pcaMeanPath.toString()));
+        }
+
         xisquaredlen = xi.dot(xi);
         Omega omega = new Omega(seed, k + p);
         Vector s_b0 = omega.mutlithreadedTRightMultiply(xi);