You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by dl...@apache.org on 2012/10/12 01:20:08 UTC
svn commit: r1397365 - in
/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop:
MatrixColumnMeansJob.java stochasticsvd/BtJob.java
stochasticsvd/SSVDCli.java stochasticsvd/SSVDSolver.java
Author: dlyubimov
Date: Thu Oct 11 23:20:07 2012
New Revision: 1397365
URL: http://svn.apache.org/viewvc?rev=1397365&view=rev
Log:
MAHOUT-1098, some housekeeping fixes in MatrixColumnMeansJob, -ow flag in --pca true mode.
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/BtJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java?rev=1397365&r1=1397364&r2=1397365&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java Thu Oct 11 23:20:07 2012
@@ -23,7 +23,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
@@ -56,7 +56,7 @@ public final class MatrixColumnMeansJob
public static Vector run(Configuration conf,
Path inputPath,
Path outputVectorTmpPath) throws IOException {
- return run(conf, inputPath, outputVectorTmpPath, VECTOR_CLASS);
+ return run(conf, inputPath, outputVectorTmpPath, null);
}
/**
@@ -82,12 +82,11 @@ public final class MatrixColumnMeansJob
vectorClass == null ? DenseVector.class.getName()
: vectorClass);
- @SuppressWarnings("deprecation")
- JobConf oldApiConf = new JobConf(initialConf);
+ Job job = new Job(initialConf, "MatrixColumnMeansJob");
+ job.setJarByClass(MatrixColumnMeansJob.class);
- org.apache.hadoop.mapred.FileOutputFormat.setOutputPath(oldApiConf,
- outputVectorTmpPath);
- Job job = new Job(initialConf);
+ FileOutputFormat.setOutputPath(job, outputVectorTmpPath);
+
outputVectorTmpPath.getFileSystem(job.getConfiguration())
.delete(outputVectorTmpPath, true);
job.setNumReduceTasks(1);
@@ -108,7 +107,7 @@ public final class MatrixColumnMeansJob
Path tmpFile = new Path(outputVectorTmpPath, "part-r-00000");
SequenceFileValueIterator<VectorWritable> iterator =
- new SequenceFileValueIterator<VectorWritable>(tmpFile, true, oldApiConf);
+ new SequenceFileValueIterator<VectorWritable>(tmpFile, true, initialConf);
try {
if (iterator.hasNext()) {
return iterator.next().get();
@@ -132,7 +131,7 @@ public final class MatrixColumnMeansJob
* Mapper for calculation of column-wise mean.
*/
public static class MatrixColumnMeansMapper extends
- Mapper<IntWritable, VectorWritable, NullWritable, VectorWritable> {
+ Mapper<Writable, VectorWritable, NullWritable, VectorWritable> {
private Vector runningSum;
private String vectorClass;
@@ -149,7 +148,7 @@ public final class MatrixColumnMeansJob
* column-wise running sum. Nothing is written at this stage
*/
@Override
- public void map(IntWritable r, VectorWritable v, Context context)
+ public void map(Writable r, VectorWritable v, Context context)
throws IOException {
if (runningSum == null) {
/*
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/BtJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/BtJob.java?rev=1397365&r1=1397364&r2=1397365&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/BtJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/BtJob.java Thu Oct 11 23:20:07 2012
@@ -384,6 +384,10 @@ public final class BtJob {
String xiPathStr = conf.get(PROP_XI_PATH);
if (xiPathStr != null) {
xi = SSVDHelper.loadAndSumUpVectors(new Path(xiPathStr), conf);
+ if (xi == null) {
+ throw new IOException(String.format("unable to load mean path xi from %s.",
+ xiPathStr));
+ }
}
if (outputBBt || xi != null) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java?rev=1397365&r1=1397364&r2=1397365&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java Thu Oct 11 23:20:07 2012
@@ -117,15 +117,26 @@ public class SSVDCli extends AbstractJob
}
Path[] inputPaths = { getInputPath() };
+ Path tempPath = getTempPath();
+ FileSystem fs = FileSystem.get(getOutputPath().toUri(), conf);
// MAHOUT-817
if (pca && xiPath == null) {
- xiPath = new Path(getTempPath(), "xi");
- MatrixColumnMeansJob.run(conf, inputPaths[0], getTempPath());
+ xiPath = new Path(tempPath, "xi");
+ if (overwrite) {
+ fs.delete(xiPath, true);
+ }
+ MatrixColumnMeansJob.run(conf, inputPaths[0], xiPath);
}
SSVDSolver solver =
- new SSVDSolver(conf, inputPaths, getTempPath(), r, k, p, reduceTasks);
+ new SSVDSolver(conf,
+ inputPaths,
+ new Path(tempPath, "ssvd"),
+ r,
+ k,
+ p,
+ reduceTasks);
solver.setMinSplitSize(minSplitSize);
solver.setComputeU(computeU);
@@ -138,13 +149,14 @@ public class SSVDCli extends AbstractJob
solver.setQ(q);
solver.setBroadcast(broadcast);
solver.setOverwrite(overwrite);
- solver.setPcaMeanPath(xiPath);
+
+ if (xiPath != null) {
+ solver.setPcaMeanPath(new Path(xiPath, "part-*"));
+ }
solver.run();
// housekeeping
- FileSystem fs = FileSystem.get(getOutputPath().toUri(), conf);
-
if (overwrite) {
fs.delete(getOutputPath(), true);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java?rev=1397365&r1=1397364&r2=1397365&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java Thu Oct 11 23:20:07 2012
@@ -406,6 +406,11 @@ public final class SSVDSolver {
*/
Vector xi = SSVDHelper.loadAndSumUpVectors(pcaMeanPath, conf);
+ if (xi == null) {
+ throw new IOException(String.format("unable to load mean path xi from %s.",
+ pcaMeanPath.toString()));
+ }
+
xisquaredlen = xi.dot(xi);
Omega omega = new Omega(seed, k + p);
Vector s_b0 = omega.mutlithreadedTRightMultiply(xi);