You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2011/05/02 20:14:25 UTC
svn commit: r1098706 [3/5] - in /mahout/trunk: core/src/main/java/
core/src/main/java/org/apache/mahout/
core/src/main/java/org/apache/mahout/cf/taste/eval/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/t...
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansMapper.java Mon May 2 18:14:18 2011
@@ -54,7 +54,7 @@ public class FuzzyKMeansMapper extends M
log.info("In Mapper Configure:");
String clusterPath = job.get(FuzzyKMeansConfigKeys.CLUSTER_PATH_KEY);
- if ((clusterPath != null) && (clusterPath.length() > 0)) {
+ if (clusterPath != null && clusterPath.length() > 0) {
FuzzyKMeansUtil.configureWithClusterInfo(new Path(clusterPath), clusters);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansReducer.java Mon May 2 18:14:18 2011
@@ -63,7 +63,7 @@ public class FuzzyKMeansReducer extends
Collection<SoftCluster> clusters = new ArrayList<SoftCluster>();
String clusterPath = conf.get(FuzzyKMeansConfigKeys.CLUSTER_PATH_KEY);
- if ((clusterPath != null) && (clusterPath.length() > 0)) {
+ if (clusterPath != null && clusterPath.length() > 0) {
FuzzyKMeansUtil.configureWithClusterInfo(new Path(clusterPath), clusters);
setClusterMap(clusters);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java Mon May 2 18:14:18 2011
@@ -54,7 +54,7 @@ final class FuzzyKMeansUtil {
result.add(fs.makeQualified(match.getPath()));
}
- // iterate thru the result path list
+ // iterate through the result path list
for (Path path : result) {
for (Writable value : new SequenceFileValueIterable<Writable>(path, conf)) {
Class<? extends Writable> valueClass = value.getClass();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterMapper.java Mon May 2 18:14:18 2011
@@ -60,7 +60,7 @@ public class KMeansClusterMapper
measure.configure(conf);
String clusterPath = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
- if ((clusterPath != null) && (clusterPath.length() > 0)) {
+ if (clusterPath != null && clusterPath.length() > 0) {
KMeansUtil.configureWithClusterInfo(conf, new Path(clusterPath), clusters);
if (clusters.isEmpty()) {
throw new IllegalStateException("No clusters found. Check your -c path.");
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansClusterer.java Mon May 2 18:14:18 2011
@@ -75,7 +75,7 @@ public class KMeansClusterer {
if (KMeansClusterer.log.isDebugEnabled()) {
log.debug("{} Cluster: {}", distance, cluster.getId());
}
- if ((distance < nearestDistance) || (nearestCluster == null)) {
+ if (distance < nearestDistance || nearestCluster == null) {
nearestCluster = cluster;
nearestDistance = distance;
}
@@ -124,7 +124,7 @@ public class KMeansClusterer {
for (AbstractCluster cluster : clusters) {
Vector clusterCenter = cluster.getCenter();
double distance = measure.distance(clusterCenter.getLengthSquared(), clusterCenter, vector);
- if ((distance < nearestDistance) || (nearestCluster == null)) {
+ if (distance < nearestDistance || nearestCluster == null) {
nearestCluster = cluster;
nearestDistance = distance;
}
@@ -151,7 +151,7 @@ public class KMeansClusterer {
if (log.isDebugEnabled()) {
log.debug("{} Cluster: {}", distance, cluster.getId());
}
- if ((distance < nearestDistance) || (nearestCluster == null)) {
+ if (distance < nearestDistance || nearestCluster == null) {
nearestCluster = cluster;
nearestDistance = distance;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Mon May 2 18:14:18 2011
@@ -303,7 +303,7 @@ public class KMeansDriver extends Abstra
boolean converged = false;
int iteration = 1;
- while (!converged && (iteration <= maxIterations)) {
+ while (!converged && iteration <= maxIterations) {
log.info("K-Means Iteration {}", iteration);
// point the output to a new directory per iteration
Path clustersOut = new Path(output, AbstractCluster.CLUSTERS_DIR + iteration);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansMapper.java Mon May 2 18:14:18 2011
@@ -54,7 +54,7 @@ public class KMeansMapper extends Mapper
this.clusterer = new KMeansClusterer(measure);
String clusterPath = conf.get(KMeansConfigKeys.CLUSTER_PATH_KEY);
- if ((clusterPath != null) && (clusterPath.length() > 0)) {
+ if (clusterPath != null && clusterPath.length() > 0) {
KMeansUtil.configureWithClusterInfo(conf, new Path(clusterPath), clusters);
if (clusters.isEmpty()) {
throw new IllegalStateException("No clusters found. Check your -c path.");
@@ -74,7 +74,6 @@ public class KMeansMapper extends Mapper
*
* @param clusters
* a List<Cluster>
- * @param measure TODO
*/
void setup(Collection<Cluster> clusters, DistanceMeasure measure) {
this.clusters.clear();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java Mon May 2 18:14:18 2011
@@ -54,7 +54,7 @@ final class KMeansUtil {
result.add(fs.makeQualified(match.getPath()));
}
- // iterate thru the result path list
+ // iterate through the result path list
for (Path path : result) {
for (Writable value : new SequenceFileValueIterable<Writable>(path, conf)) {
Class<? extends Writable> valueClass = value.getClass();
Added: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/package-info.java?rev=1098706&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/package-info.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/package-info.java Mon May 2 18:14:18 2011
@@ -0,0 +1,5 @@
+/**
+ * This package provides an implementation of the <a href="http://en.wikipedia.org/wiki/Kmeans">k-means</a> clustering
+ * algorithm.
+ */
+package org.apache.mahout.clustering.kmeans;
\ No newline at end of file
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java Mon May 2 18:14:18 2011
@@ -163,7 +163,7 @@ public final class LDADriver extends Abs
double oldLL = Double.NEGATIVE_INFINITY;
boolean converged = false;
- for (int iteration = 1; ((maxIterations < 1) || (iteration <= maxIterations)) && !converged; iteration++) {
+ for (int iteration = 1; (maxIterations < 1 || iteration <= maxIterations) && !converged; iteration++) {
log.info("LDA Iteration {}", iteration);
// point the output to a new directory per iteration
Path stateOut = new Path(output, "state-" + iteration);
@@ -175,7 +175,7 @@ public final class LDADriver extends Abs
log.info("(Old LL: {})", oldLL);
log.info("(Rel Change: {})", relChange);
- converged = (iteration > 3) && (relChange < OVERALL_CONVERGENCE);
+ converged = iteration > 3 && relChange < OVERALL_CONVERGENCE;
stateIn = stateOut;
oldLL = ll;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAInference.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAInference.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAInference.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAInference.java Mon May 2 18:14:18 2011
@@ -99,8 +99,8 @@ public class LDAInference {
int iteration = 0;
boolean converged = false;
- double oldLL = 1;
- while (!converged && (iteration < MAX_ITER)) {
+ double oldLL = 1.0;
+ while (!converged && iteration < MAX_ITER) {
nextGamma.assign(state.getTopicSmoothing()); // nG := alpha, for all topics
int mapping = 0;
@@ -129,7 +129,7 @@ public class LDAInference {
double ll = computeLikelihood(wordCounts, map, phi, gamma, digammaGamma);
// isNotNaNAssertion(ll);
- converged = (oldLL < 0) && ((oldLL - ll) / oldLL < E_STEP_CONVERGENCE);
+ converged = oldLL < 0.0 && (oldLL - ll) / oldLL < E_STEP_CONVERGENCE;
oldLL = ll;
iteration++;
@@ -256,26 +256,5 @@ public class LDAInference {
+ f * (-1.0 / 132.0 + f * (691.0 / 32760.0 + f * (-1.0 / 12.0 + f * 3617.0 / 8160.0)))))));
return r + Math.log(x) - 0.5 / x + t;
}
-
- /*
- private void assertions(int word, Vector digammaGamma, double phiTotal, int k) {
- assert !Double.isNaN(phiTotal);
- assert !Double.isNaN(state.logProbWordGivenTopic(word, k));
- assert !Double.isInfinite(state.logProbWordGivenTopic(word, k));
- assert !Double.isNaN(digammaGamma.getQuick(k));
- }
-
- private void likelihoodAssertion(int w, int k, double llPart) {
- assert state.logProbWordGivenTopic(w, k) < 0;
- assert !Double.isNaN(llPart);
- }
- private void isLessThanOrEqualsZero(double ll) {
- assert ll <= 0;
- }
-
- private void isNotNaNAssertion(double ll) {
- assert !Double.isNaN(ll) : state.topicSmoothing + " " + state.numTopics;
- }
- */
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDAMapper.java Mon May 2 18:14:18 2011
@@ -78,7 +78,6 @@ public class LDAMapper extends Mapper<Wr
for (int k = 0; k < state.getNumTopics(); ++k) {
IntPairWritable kw = new IntPairWritable(k, LDADriver.TOPIC_SUM_KEY);
v.set(logTotals[k]);
- assert !Double.isNaN(v.get());
context.write(kw, v);
}
IntPairWritable llk = new IntPairWritable(LDADriver.LOG_LIKELIHOOD_KEY, LDADriver.LOG_LIKELIHOOD_KEY);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java Mon May 2 18:14:18 2011
@@ -142,7 +142,7 @@ public class MeanShiftCanopy extends Clu
result.setCenter(this.getCenter());
result.setRadius(this.getRadius());
result.setNumPoints(this.getNumPoints());
- result.boundPoints = this.boundPoints;
+ result.setBoundPoints(boundPoints);
return result;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java Mon May 2 18:14:18 2011
@@ -96,7 +96,7 @@ public class MeanShiftCanopyClusterer {
if (norm < t1) {
aCanopy.touch(canopy);
}
- if (norm < t2 && ((closestCoveringCanopy == null) || (norm < closestNorm))) {
+ if (norm < t2 && (closestCoveringCanopy == null || norm < closestNorm)) {
closestNorm = norm;
closestCoveringCanopy = canopy;
}
@@ -176,8 +176,7 @@ public class MeanShiftCanopyClusterer {
for (int iter = 0; !converged[0] && iter < numIter; iter++) {
newCanopies = clusterer.iterate(newCanopies, converged);
}
- canopies = newCanopies;
- return canopies;
+ return newCanopies;
}
protected List<MeanShiftCanopy> iterate(Iterable<MeanShiftCanopy> canopies, boolean[] converged) {
@@ -199,7 +198,6 @@ public class MeanShiftCanopyClusterer {
log.info("Duplicate bound point: {} in Canopy: {}", v, canopy.asFormatString(null));
} else {
coveredPoints.add(v);
- //System.out.println("Added bound point: " + v + " to Canopy: " + canopy.asFormatString(null));
}
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Mon May 2 18:14:18 2011
@@ -143,17 +143,17 @@ public class MeanShiftCanopyDriver exten
* true if the input points are to be clustered once the iterations complete
* @param runSequential if true run in sequential execution mode
*/
- public void run(Configuration conf,
- Path input,
- Path output,
- DistanceMeasure measure,
- double t1,
- double t2,
- double convergenceDelta,
- int maxIterations,
- boolean inputIsCanopies,
- boolean runClustering,
- boolean runSequential)
+ public static void run(Configuration conf,
+ Path input,
+ Path output,
+ DistanceMeasure measure,
+ double t1,
+ double t2,
+ double convergenceDelta,
+ int maxIterations,
+ boolean inputIsCanopies,
+ boolean runClustering,
+ boolean runSequential)
throws IOException, InterruptedException, ClassNotFoundException {
Path clustersIn = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
if (inputIsCanopies) {
@@ -260,15 +260,15 @@ public class MeanShiftCanopyDriver exten
* an int number of iterations
* @param runSequential if true run in sequential execution mode
*/
- public Path buildClusters(Configuration conf,
- Path clustersIn,
- Path output,
- DistanceMeasure measure,
- double t1,
- double t2,
- double convergenceDelta,
- int maxIterations,
- boolean runSequential)
+ public static Path buildClusters(Configuration conf,
+ Path clustersIn,
+ Path output,
+ DistanceMeasure measure,
+ double t1,
+ double t2,
+ double convergenceDelta,
+ int maxIterations,
+ boolean runSequential)
throws IOException, InterruptedException, ClassNotFoundException {
if (runSequential) {
return buildClustersSeq(clustersIn, output, measure, t1, t2, convergenceDelta, maxIterations);
@@ -345,7 +345,7 @@ public class MeanShiftCanopyDriver exten
// iterate until the clusters converge
boolean converged = false;
int iteration = 1;
- while (!converged && (iteration <= maxIterations)) {
+ while (!converged && iteration <= maxIterations) {
log.info("Mean Shift Iteration {}", iteration);
// point the output to a new directory per iteration
Path clustersOut = new Path(output, Cluster.CLUSTERS_DIR + iteration);
Added: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/package-info.java?rev=1098706&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/package-info.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/package-info.java Mon May 2 18:14:18 2011
@@ -0,0 +1,13 @@
+/**
+ * <p></p>This package provides several clustering algorithm implementations. Clustering usually groups a set of
+ * objects into groups of similar items. The definition of similarity usually is up to you - for text documents,
+ * cosine-distance/-similarity is recommended. Mahout also features other types of distance measure like
+ * Euclidean distance.</p>
+ *
+ * <p></p>Input of each clustering algorithm is a set of vectors representing your items. For texts in general these are
+ * <a href="http://en.wikipedia.org/wiki/TFIDF">TFIDF</a> or
+ * <a href="http://en.wikipedia.org/wiki/Bag_of_words">Bag of words</a> representations of the documents.</p>
+ *
+ * <p>Output of each clustering algorithm is either a hard or soft assignment of items to clusters.</p>
+ */
+package org.apache.mahout.clustering;
\ No newline at end of file
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputMapper.java Mon May 2 18:14:18 2011
@@ -32,11 +32,11 @@ import org.slf4j.LoggerFactory;
* <p>Handles reading the files representing the affinity matrix. Since the affinity
* matrix is representative of a graph, each line in all the files should
* take the form:</p>
- *
- * <code>i,j,value</code>
- *
- * <p>where <code>i</code> and <code>j</code> are the <code>i</code>th and
- * <code>j</code> data points in the entire set, and <code>value</code>
+ *
+ * {@code i,j,value}
+ *
+ * <p>where {@code i} and {@code j} are the {@code i}th and
+ * {@code j} data points in the entire set, and {@code value}
* represents some measurement of their relative absolute magnitudes. This
* is, simply, a method for representing a graph textually.
*/
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/UnitVectorizerJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/UnitVectorizerJob.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/UnitVectorizerJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/UnitVectorizerJob.java Mon May 2 18:14:18 2011
@@ -37,8 +37,8 @@ import org.apache.mahout.math.function.F
* <p>Given a DistributedRowMatrix, this job normalizes each row to unit
* vector length. If the input is a matrix U, and the output is a matrix
* W, the job follows:</p>
- *
- * <p><code>v_ij = u_ij / sqrt(sum_j(u_ij * u_ij))</code></p>
+ *
+ * <p>{@code v_ij = u_ij / sqrt(sum_j(u_ij * u_ij))}</p>
*/
public final class UnitVectorizerJob {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/VectorMatrixMultiplicationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/VectorMatrixMultiplicationJob.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/VectorMatrixMultiplicationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/VectorMatrixMultiplicationJob.java Mon May 2 18:14:18 2011
@@ -40,9 +40,9 @@ import org.apache.mahout.math.hadoop.Dis
* <p>This class handles the three-way multiplication of the digonal matrix
* and the Markov transition matrix inherent in the Eigencuts algorithm.
* The equation takes the form:</p>
- *
- * <code>W = D^(1/2) * M * D^(1/2)</code>
- *
+ *
+ * {@code W = D^(1/2) * M * D^(1/2)}
+ *
* <p>Since the diagonal matrix D has only n non-zero elements, it is represented
* as a dense vector in this job, rather than a full n-by-n matrix. This job
* performs the multiplications and returns the new DRM.
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityMapper.java Mon May 2 18:14:18 2011
@@ -64,7 +64,7 @@ public class EigencutsSensitivityMapper
// first, does this particular eigenvector even pass the required threshold?
double eigenvalue = Math.abs(eigenvalues.get(row.get()));
double betak = -Functions.LOGARITHM.apply(2) / Functions.LOGARITHM.apply(eigenvalue);
- if (eigenvalue >= 1.0 || betak <= (epsilon * beta0)) {
+ if (eigenvalue >= 1.0 || betak <= epsilon * beta0) {
// doesn't pass the threshold! quit
return;
}
@@ -124,8 +124,8 @@ public class EigencutsSensitivityMapper
* Functions.LOGARITHM.apply(Functions.POW.apply(eigenvalue, beta0) / 2));
double secondhalf =
- -Functions.POW.apply((evi / Functions.SQRT.apply(diagi)) - (evj / Functions.SQRT.apply(diagj)), 2)
- + (1.0 - eigenvalue) * ((Functions.POW.apply(evi, 2) / diagi) + (Functions.POW.apply(evj, 2) / diagj));
+ -Functions.POW.apply(evi / Functions.SQRT.apply(diagi) - evj / Functions.SQRT.apply(diagj), 2)
+ + (1.0 - eigenvalue) * (Functions.POW.apply(evi, 2) / diagi + Functions.POW.apply(evj, 2) / diagj);
return firsthalf * secondhalf;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Mon May 2 18:14:18 2011
@@ -102,7 +102,7 @@ public abstract class AbstractJob extend
/** Returns the input path established by a call to {@link #parseArguments(String[])}.
* The source of the path may be an input option added using {@link #addInputOption()}
- * or it may be the value of the <code>mapred.input.dir</code> configuration
+ * or it may be the value of the {@code mapred.input.dir} configuration
* property.
*/
protected Path getInputPath() {
@@ -111,7 +111,7 @@ public abstract class AbstractJob extend
/** Returns the output path established by a call to {@link #parseArguments(String[])}.
* The source of the path may be an output option added using {@link #addOutputOption()}
- * or it may be the value of the <code>mapred.input.dir</code> configuration
+ * or it may be the value of the {@code mapred.input.dir} configuration
* property.
*/
protected Path getOutputPath() {
@@ -119,8 +119,7 @@ public abstract class AbstractJob extend
}
/** Add an option with no argument whose presence can be checked for using
- * <code>containsKey<code> method on the map returned by
- * {@link #parseArguments(String[])};
+ * {@code containsKey} method on the map returned by {@link #parseArguments(String[])};
*/
protected void addFlag(String name, String shortName, String description) {
options.add(buildOption(name, shortName, description, false, false, null));
@@ -159,8 +158,8 @@ public abstract class AbstractJob extend
/** Add an arbitrary option to the set of options this job will parse when
* {@link #parseArguments(String[])} is called. If this option has no
- * argument, use <code>containsKey</code> on the map returned by
- * <code>parseArguments</code> to check for its presence. Otherwise, the
+ * argument, use {@code containsKey} on the map returned by
+ * {@code parseArguments} to check for its presence. Otherwise, the
* string value of the option will be placed in the map using a key
* equal to this options long name preceded by '--'.
* @return the option added.
@@ -227,19 +226,19 @@ public abstract class AbstractJob extend
}
/** Parse the arguments specified based on the options defined using the
- * various <code>addOption</code> methods. If -h is specified or an
+ * various {@code addOption} methods. If -h is specified or an
* exception is encountered print help and return null. Has the
* side effect of setting inputPath and outputPath
- * if <code>addInputOption</code> or <code>addOutputOption</code>
- * or <code>mapred.input.dir</code> or <code>mapred.output.dir</code>
+ * if {@code addInputOption} or {@code addOutputOption}
+ * or {@code mapred.input.dir} or {@code mapred.output.dir}
* are present in the Configuration.
- *
- * @return a Map<String,Sting> containing options and their argument values.
- * The presence of a flag can be tested using <code>containsKey</code>, while
- * argument values can be retrieved using <code>get(optionName)</code>. The
+ *
+ * @return a {@code Map<String,String>} containing options and their argument values.
+ * The presence of a flag can be tested using {@code containsKey}, while
+ * argument values can be retrieved using {@code get(optionName)}. The
* names used for keys are the option name parameter prefixed by '--'.
- *
- *
+ *
+ *
*/
public Map<String, String> parseArguments(String[] args) {
@@ -311,18 +310,18 @@ public abstract class AbstractJob extend
}
/** Obtain input and output directories from command-line options or hadoop
- * properties. If <code>addInputOption</code> or <code>addOutputOption</code>
- * has been called, this method will throw an <code>OptionException</code> if
+ * properties. If {@code addInputOption} or {@code addOutputOption}
+ * has been called, this method will throw an {@code OptionException} if
* no source (command-line or property) for that value is present.
- * Otherwise, <code>inputPath</code> or <code>outputPath<code> will be
+ * Otherwise, {@code inputPath} or {@code outputPath} will be
* non-null only if specified as a hadoop property. Command-line options
* take precedence over hadoop properties.
- *
+ *
* @param cmdLine
* @throws IllegalArgumentException if either inputOption is present,
- * and neither <code>--input</code> nor <code>-Dmapred.input dir</code> are
- * specified or outputOption is present and neither <code>--output</code>
- * nor <code>-Dmapred.output.dir</code> are specified.
+ * and neither {@code --input} nor {@code -Dmapred.input dir} are
+ * specified or outputOption is present and neither {@code --output}
+ * nor {@code -Dmapred.output.dir} are specified.
*/
protected void parseDirectories(CommandLine cmdLine) {
@@ -357,7 +356,7 @@ public abstract class AbstractJob extend
// nulls are ok, for cases where options are simple flags.
Object vo = cmdLine.getValue(o);
- String value = (vo == null) ? null : vo.toString();
+ String value = vo == null ? null : vo.toString();
args.put(o.getPreferredName(), value);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java Mon May 2 18:14:18 2011
@@ -19,6 +19,7 @@ package org.apache.mahout.common;
import java.io.PrintWriter;
+import org.apache.commons.cli.Options;
import org.apache.commons.cli2.Group;
import org.apache.commons.cli2.OptionException;
import org.apache.commons.cli2.util.HelpFormatter;
@@ -36,14 +37,14 @@ public final class CommandLineUtil {
}
/**
- * Print the options supported by <code>GenericOptionsParser</code>.
+ * Print the options supported by {@code GenericOptionsParser}.
* In addition to the options supported by the job, passed in as the
* group parameter.
*
* @param group job-specific command-line options.
*/
public static void printHelpWithGenericOptions(Group group) {
- org.apache.commons.cli.Options ops = new org.apache.commons.cli.Options();
+ Options ops = new Options();
new GenericOptionsParser(new Configuration(), ops, new String[0]);
org.apache.commons.cli.HelpFormatter fmt = new org.apache.commons.cli.HelpFormatter();
fmt.printHelp("<command> [Generic Options] [Job-Specific Options]",
@@ -61,7 +62,7 @@ public final class CommandLineUtil {
}
public static void printHelpWithGenericOptions(Group group, OptionException oe) {
- org.apache.commons.cli.Options ops = new org.apache.commons.cli.Options();
+ Options ops = new Options();
new GenericOptionsParser(new Configuration(), ops, new String[0]);
org.apache.commons.cli.HelpFormatter fmt = new org.apache.commons.cli.HelpFormatter();
fmt.printHelp("<command> [Generic Options] [Job-Specific Options]",
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java Mon May 2 18:14:18 2011
@@ -105,14 +105,14 @@ public final class IOUtils {
/**
* make sure to close all sources, log all of the problems occurred, clear
- * <code>closeables</code> (to prevent repeating close attempts), re-throw the
+ * {@code closeables} (to prevent repeating close attempts), re-throw the
* last one at the end. Helps resource scope management (e.g. compositions of
* {@link Closeable}s objects)
* <P>
* <p/>
* Typical pattern:
* <p/>
- *
+ *
* <pre>
* LinkedList<Closeable> closeables = new LinkedList<Closeable>();
* try {
@@ -126,13 +126,11 @@ public final class IOUtils {
* IOUtils.close(closeables);
* }
* </pre>
- *
+ *
* @param closeables
* must be a modifiable collection of {@link Closeable}s
* @throws IOException
* the last exception (if any) of all closed resources
- *
- *
*/
public static void close(Collection<? extends Closeable> closeables)
throws IOException {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/IntTuple.java Mon May 2 18:14:18 2011
@@ -52,10 +52,7 @@ public final class IntTuple implements W
}
/**
- * add an entry to the end of the list
- *
- * @param entry
- * @return true if the items get added
+ * Add an entry to the end of the list
*/
public void add(int entry) {
tuple.add(entry);
@@ -64,8 +61,7 @@ public final class IntTuple implements W
/**
* Fetches the string at the given location
*
- * @param index
- * @return Integer value at the given location in the tuple list
+ * @return int value at the given location in the tuple list
*/
public int at(int index) {
return tuple.get(index);
@@ -74,8 +70,6 @@ public final class IntTuple implements W
/**
* Replaces the string at the given index with the given newInteger
*
- * @param index
- * @param newInteger
* @return The previous value at that location
*/
public int replaceAt(int index, int newInteger) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/CountingIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/CountingIterator.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/CountingIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/CountingIterator.java Mon May 2 18:14:18 2011
@@ -20,7 +20,7 @@ package org.apache.mahout.common.iterato
import com.google.common.collect.AbstractIterator;
/**
- * Iterates over the integers from 0 through <code>to-1</code>.
+ * Iterates over the integers from 0 through {@code to-1}.
*/
public final class CountingIterator extends AbstractIterator<Integer> {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/DecisionForest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/DecisionForest.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/DecisionForest.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/DecisionForest.java Mon May 2 18:14:18 2011
@@ -153,7 +153,7 @@ public class DecisionForest implements W
DecisionForest rf = (DecisionForest) obj;
- return (trees.size() == rf.getTrees().size()) && trees.containsAll(rf.getTrees());
+ return trees.size() == rf.getTrees().size() && trees.containsAll(rf.getTrees());
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/builder/DefaultTreeBuilder.java Mon May 2 18:14:18 2011
@@ -91,7 +91,7 @@ public class DefaultTreeBuilder implemen
Split best = null;
for (int attr : attributes) {
Split split = igSplit.computeSplit(data, attr);
- if ((best == null) || (best.getIg() < split.getIg())) {
+ if (best == null || best.getIg() < split.getIg()) {
best = split;
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java Mon May 2 18:14:18 2011
@@ -251,7 +251,7 @@ public class Data implements Cloneable {
if (this == obj) {
return true;
}
- if ((obj == null) || !(obj instanceof Data)) {
+ if (obj == null || !(obj instanceof Data)) {
return false;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Dataset.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Dataset.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Dataset.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Dataset.java Mon May 2 18:14:18 2011
@@ -235,7 +235,7 @@ public class Dataset implements Writable
if (this == obj) {
return true;
}
- if ((obj == null) || !(obj instanceof Dataset)) {
+ if (obj == null || !(obj instanceof Dataset)) {
return false;
}
@@ -255,7 +255,7 @@ public class Dataset implements Writable
}
}
- return (labelId == dataset.labelId) && (nbInstances == dataset.nbInstances);
+ return labelId == dataset.labelId && nbInstances == dataset.nbInstances;
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java Mon May 2 18:14:18 2011
@@ -64,13 +64,13 @@ public class Instance {
if (this == obj) {
return true;
}
- if ((obj == null) || !(obj instanceof Instance)) {
+ if (obj == null || !(obj instanceof Instance)) {
return false;
}
Instance instance = (Instance) obj;
- return (id == instance.id) && (label == instance.label) && attrs.equals(instance.attrs);
+ return id == instance.id && label == instance.label && attrs.equals(instance.attrs);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Builder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Builder.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Builder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Builder.java Mon May 2 18:14:18 2011
@@ -224,7 +224,7 @@ public abstract class Builder {
public static Path getDistributedCacheFile(Configuration conf, int index) throws IOException {
URI[] files = DistributedCache.getCacheFiles(conf);
- if ((files == null) || (files.length <= index)) {
+ if (files == null || files.length <= index) {
throw new IOException("path not found in the DistributedCache");
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java Mon May 2 18:14:18 2011
@@ -212,7 +212,7 @@ public class Classifier {
URI[] files = DistributedCache.getCacheFiles(conf);
- if ((files == null) || (files.length < 2)) {
+ if (files == null || files.length < 2) {
throw new IOException("not enough paths in the DistributedCache");
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/MapredOutput.java Mon May 2 18:14:18 2011
@@ -101,14 +101,13 @@ public class MapredOutput implements Wri
if (this == obj) {
return true;
}
- if ((obj == null) || !(obj instanceof MapredOutput)) {
+ if (obj == null || !(obj instanceof MapredOutput)) {
return false;
}
MapredOutput mo = (MapredOutput) obj;
- return ((tree == null && mo.getTree() == null)
- || (tree != null && tree.equals(mo.getTree())))
+ return ((tree == null && mo.getTree() == null)|| (tree != null && tree.equals(mo.getTree())))
&& Arrays.equals(predictions, mo.getPredictions());
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemInputFormat.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemInputFormat.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemInputFormat.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/InMemInputFormat.java Mon May 2 18:14:18 2011
@@ -84,12 +84,12 @@ public class InMemInputFormat extends In
seed = Builder.getRandomSeed(conf);
isSingleSeed = isSingleSeed(conf);
- if ((rng != null) && (seed != null)) {
+ if (rng != null && seed != null) {
log.warn("getSplits() was called more than once and the 'seed' is set, "
+ "this can lead to no-repeatable behavior");
}
- rng = (seed == null) || isSingleSeed ? null : RandomUtils.getRandom(seed);
+ rng = seed == null || isSingleSeed ? null : RandomUtils.getRandom(seed);
int id = 0;
Added: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/package-info.java?rev=1098706&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/package-info.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/inmem/package-info.java Mon May 2 18:14:18 2011
@@ -0,0 +1,23 @@
+/**
+ * <h2>In-memory mapreduce implementation of Random Decision Forests</h2>
+ *
+ * <p>Each mapper is responsible for growing a number of trees with a whole copy of the dataset loaded in memory,
+ * it uses the reference implementation's code to build each tree and estimate the oob error.</p>
+ *
+ * <p>The dataset is distributed to the slave nodes using the {@link org.apache.hadoop.filecache.DistributedCache}.
+ * A custom {@link org.apache.hadoop.mapreduce.InputFormat}
+ * ({@link org.apache.mahout.df.mapreduce.inmem.InMemInputFormat}) is configured with the
+ * desired number of trees and generates a number of {@link org.apache.hadoop.mapreduce.InputSplit}s
+ * equal to the configured number of maps.</p>
+ *
+ * <p>There is no need for reducers, each map outputs (the trees it built and, for each tree, the labels the
+ * tree predicted for each out-of-bag instance. This step has to be done in the mapper because only there we
+ * know which instances are o-o-b.</p>
+ *
+ * <p>The Forest builder ({@link org.apache.mahout.df.mapreduce.inmem.InMemBuilder}) is responsible
+ * for configuring and launching the job.
+ * At the end of the job it parses the output files and builds the corresponding
+ * {@link org.apache.mahout.df.DecisionForest}, and for each tree prediction it calls (if available) a
+ * {@link org.apache.mahout.df.callback.PredictionCallback} that allows the caller to compute any error needed.</p>
+ */
+package org.apache.mahout.df.mapreduce.inmem;
\ No newline at end of file
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/PartialBuilder.java Mon May 2 18:14:18 2011
@@ -124,7 +124,7 @@ public class PartialBuilder extends Buil
int numMaps = partitions.length;
// call the second step in order to complete the oob predictions
- if ((callback != null) && (numMaps > 1) && isStep2(conf)) {
+ if (callback != null && numMaps > 1 && isStep2(conf)) {
log.info("*****************************");
log.info("Second Step");
log.info("*****************************");
@@ -157,7 +157,7 @@ public class PartialBuilder extends Buil
TreeID[] keys,
Node[] trees,
PredictionCallback callback) throws IOException {
- Preconditions.checkArgument((keys == null && trees == null) || (keys != null && trees != null),
+ Preconditions.checkArgument(keys == null && trees == null || keys != null && trees != null,
"if keys is null, trees should also be null");
Preconditions.checkArgument(keys == null || keys.length == trees.length, "keys.length != trees.length");
Added: mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/package-info.java?rev=1098706&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/package-info.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/package-info.java Mon May 2 18:14:18 2011
@@ -0,0 +1,16 @@
+/**
+ * <h2>Partial-data mapreduce implementation of Random Decision Forests</h2>
+ *
+ * <p>The builder splits the data, using a FileInputSplit, among the mappers.
+ * Building the forest and estimating the oob error takes two job steps.</p>
+ *
+ * <p>In the first step, each mapper is responsible for growing a number of trees with its partition's,
+ * loading the data instances in its {@code map()} function, then building the trees in the {@code close()} method. It
+ * uses the reference implementation's code to build each tree and estimate the oob error.</p>
+ *
+ * <p>The second step is needed when estimating the oob error. Each mapper loads all the trees that does not
+ * belong to its own partition (were not built using the partition's data) and uses them to classify the
+ * partition's data instances. The data instances are loaded in the {@code map()} method and the classification
+ * is performed in the {@code close()} method.</p>
+ */
+package org.apache.mahout.df.mapreduce.partial;
\ No newline at end of file
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/node/CategoricalNode.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/CategoricalNode.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/node/CategoricalNode.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/node/CategoricalNode.java Mon May 2 18:14:18 2011
@@ -86,13 +86,13 @@ public class CategoricalNode extends Nod
if (this == obj) {
return true;
}
- if ((obj == null) || !(obj instanceof CategoricalNode)) {
+ if (obj == null || !(obj instanceof CategoricalNode)) {
return false;
}
CategoricalNode node = (CategoricalNode) obj;
- return (attr == node.attr) && Arrays.equals(values, node.values) && Arrays.equals(childs, node.childs);
+ return attr == node.attr && Arrays.equals(values, node.values) && Arrays.equals(childs, node.childs);
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Leaf.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Leaf.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Leaf.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/node/Leaf.java Mon May 2 18:14:18 2011
@@ -20,7 +20,6 @@ package org.apache.mahout.df.node;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import java.util.StringTokenizer;
import org.apache.mahout.df.data.Instance;
@@ -51,18 +50,6 @@ public class Leaf extends Node {
return 1;
}
- /**
- * Extract a Leaf Node
- *
- * @param tokenizer
- * @return the extracted Leaf Node
- */
- static Leaf parse(StringTokenizer tokenizer) {
- int label = Integer.parseInt(tokenizer.nextToken());
-
- return new Leaf(label);
- }
-
@Override
protected Type getType() {
return Type.LEAF;
@@ -73,7 +60,7 @@ public class Leaf extends Node {
if (this == obj) {
return true;
}
- if ((obj == null) || !(obj instanceof Leaf)) {
+ if (obj == null || !(obj instanceof Leaf)) {
return false;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/node/NumericalNode.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/node/NumericalNode.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/node/NumericalNode.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/node/NumericalNode.java Mon May 2 18:14:18 2011
@@ -77,14 +77,13 @@ public class NumericalNode extends Node
if (this == obj) {
return true;
}
- if ((obj == null) || !(obj instanceof NumericalNode)) {
+ if (obj == null || !(obj instanceof NumericalNode)) {
return false;
}
NumericalNode node = (NumericalNode) obj;
- return (attr == node.attr) && (split == node.split) && loChild.equals(node.loChild)
- && hiChild.equals(node.hiChild);
+ return attr == node.attr && split == node.split && loChild.equals(node.loChild) && hiChild.equals(node.hiChild);
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java Mon May 2 18:14:18 2011
@@ -111,7 +111,7 @@ public final class MahoutDriver {
addClass(programDriver, keyString, mainClasses.getProperty(keyString));
}
- if (args.length < 1 || args[0] == null || args[0].equals("-h") || args[0].equals("--help")) {
+ if (args.length < 1 || args[0] == null || "-h".equals(args[0]) || "--help".equals(args[0])) {
programDriver.driver(args);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/ThreadedEvolutionaryProcess.java Mon May 2 18:14:18 2011
@@ -108,7 +108,7 @@ public class ThreadedEvolutionaryProcess
int k = 0;
while (pending.size() + working.size() < threadCount) {
- State<?, ?> tmp = parents[(k++) % parentDepth];
+ State<?,?> tmp = parents[k++ % parentDepth];
pending.add(tmp.mutate());
}
} while (System.currentTimeMillis() - t0 < timeLimit);
Added: mahout/trunk/core/src/main/java/org/apache/mahout/ep/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/package-info.java?rev=1098706&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/package-info.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/package-info.java Mon May 2 18:14:18 2011
@@ -0,0 +1,26 @@
+/**
+ * <p>Provides basic evolutionary optimization using <a href="http://arxiv.org/abs/0803.3838">recorded-step</a>
+ * mutation.</p>
+ *
+ * <p>With this style of optimization, we can optimize a function {@code f: R^n -> R} by stochastic
+ * hill-climbing with some of the benefits of conjugate gradient style history encoded in the mutation function.
+ * This mutation function will adapt to allow weakly directed search rather than using the somewhat more
+ * conventional symmetric Gaussian.</p>
+ *
+ * <p>With recorded-step mutation, the meta-mutation parameters are all auto-encoded in the current state of each point.
+ * This avoids the classic problem of having more mutation rate parameters than are in the original state and then
+ * requiring even more parameters to describe the meta-mutation rate. Instead, we store the previous point and one
+ * omni-directional mutation component. Mutation is performed by first mutating along the line formed by the previous
+ * and current points and then adding a scaled symmetric Gaussian. The magnitude of the omni-directional mutation is
+ * then mutated using itself as a scale.</p>
+ *
+ * <p>Because it is convenient to not restrict the parameter space, this package also provides convenient parameter
+ * mapping methods. These mapping methods map the set of reals to a finite open interval (a,b) in such a way that
+ * {@code lim_{x->-\inf} f(x) = a} and {@code lim_{x->\inf} f(x) = b}. The linear mapping is defined so that
+ * {@code f(0) = (a+b)/2} and the exponential mapping requires that a and b are both positive and has
+ * {@code f(0) = sqrt(ab)}. The linear mapping is useful for values that must stay roughly within a range but
+ * which are roughly uniform within the center of that range. The exponential
+ * mapping is useful for values that must stay within a range but whose distribution is roughly exponential near
+ * geometric mean of the end-points. An identity mapping is also supplied.</p>
+ */
+package org.apache.mahout.ep;
\ No newline at end of file
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthDriver.java Mon May 2 18:14:18 2011
@@ -159,9 +159,9 @@ public final class FPGrowthDriver {
params.set("output", outputDir.toString());
String classificationMethod = (String) cmdLine.getValue(methodOpt);
- if (classificationMethod.equalsIgnoreCase("sequential")) {
+ if ("sequential".equalsIgnoreCase(classificationMethod)) {
runFPGrowth(params);
- } else if (classificationMethod.equalsIgnoreCase("mapreduce")) {
+ } else if ("mapreduce".equalsIgnoreCase(classificationMethod)) {
Configuration conf = new Configuration();
HadoopUtil.delete(conf, outputDir);
PFPGrowth.runPFPGrowth(params);
Added: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package-info.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package-info.java?rev=1098706&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package-info.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/package-info.java Mon May 2 18:14:18 2011
@@ -0,0 +1,49 @@
+/**
+ * <h2>MapReduce (parallel) implementation of FP Growth Algorithm for frequent Itemset Mining</h2>
+ *
+ * <p>We have a Top K Parallel FPGrowth Implementation. What it means is that given a huge transaction list,
+ * we find all unique features(field values) and eliminates those features whose frequency in the whole dataset
+ * is less that {@code minSupport}. Using these remaining features N, we find the top K closed patterns for
+ * each of them, generating NK patterns. FPGrowth Algorithm is a generic implementation, we can use any object
+ * type to denote a feature. Current implementation requires you to use a String as the object type. You may
+ * implement a version for any object by creating {@link java.util.Iterator}s, Convertors
+ * and TopKPatternWritable for that particular object. For more information please refer the package
+ * {@code org.apache.mahout.fpm.pfpgrowth.convertors.string}.</p>
+ *
+ * {@code
+ * FPGrowth<String> fp = new FPGrowth<String>();
+ * Set<String> features = new HashSet<String>();
+ * fp.generateTopKStringFrequentPatterns(
+ * new StringRecordIterator(
+ * new FileLineIterable(new File(input), encoding, false), pattern),
+ * fp.generateFList(
+ * new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern), minSupport),
+ * minSupport,
+ * maxHeapSize,
+ * features,
+ * new StringOutputConvertor(new SequenceFileOutputCollector<Text,TopKStringPatterns>(writer)));}
+ *
+ * <ul>
+ * <li>The first argument is the iterator of transaction in this case its {@code Iterator<List<String>>}</li>
+ * <li>The second argument is the output of generateFList function, which returns the frequent items and
+ * their frequencies from the given database transaction iterator</li>
+ * <li>The third argument is the minimum Support of the pattern to be generated</li>
+ * <li>The fourth argument is the maximum number of patterns to be mined for each feature</li>
+ * <li>The fifth argument is the set of features for which the frequent patterns has to be mined</li>
+ * <li>The last argument is an output collector which takes [key, value] of Feature and TopK Patterns of the format
+ * {@code [String, List<Pair<List<String>,Long>>]} and writes them to the appropriate writer class
+ * which takes care of storing the object, in this case in a
+ * {@link org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat}</li>
+ * </ul>
+ *
+ * <p>The command line launcher for string transaction data {@code org.apache.mahout.fpm.pfpgrowth.FPGrowthJob}
+ * has other features including specifying the regex pattern for spitting a string line of a transaction into
+ * the constituent features.</p>
+ *
+ * <p>The {@code numGroups} parameter in FPGrowthJob specifies the number of groups into which transactions
+ * have to be decomposed. The {@code numTreeCacheEntries} parameter specifies the number of generated
+ * conditional FP-Trees to be kept in memory so as not to regenerate them. Increasing this number
+ * increases the memory consumption but might improve speed until a certain point. This depends entirely on
+ * the dataset in question. A value of 5-10 is recommended for mining up to top 100 patterns for each feature.</p>
+ */
+package org.apache.mahout.fpm.pfpgrowth;
\ No newline at end of file
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/EvalMapper.java Mon May 2 18:14:18 2011
@@ -31,8 +31,9 @@ import com.google.common.base.Preconditi
/**
* <p>
- * Generic Mapper class for fitness evaluation. Works with the following : <code><key, candidate, key,
- * fitness></code>, where :
+ * Generic Mapper class for fitness evaluation. Works with the following :
+ * {@code <key, candidate, key, fitness>}
+ * , where :
* </p>
* key: position of the current candidate in the input file. <br>
* candidate: candidate solution to evaluate. <br>
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/MahoutEvaluator.java Mon May 2 18:14:18 2011
@@ -47,13 +47,13 @@ public final class MahoutEvaluator {
/**
* Uses Mahout to evaluate every candidate from the input population using the given evaluator.
- *
+ *
* @param evaluator
* FitnessEvaluator to use
* @param population
* input population
* @param evaluations
- * <code>List<Double></code> that contains the evaluated fitness for each candidate from the
+ * {@code List<Double>} that contains the evaluated fitness for each candidate from the
* input population, sorted in the same order as the candidates.
*/
public static void evaluate(FitnessEvaluator<?> evaluator,
@@ -81,13 +81,13 @@ public final class MahoutEvaluator {
/**
* Configure the job
- *
+ *
* @param evaluator
* FitnessEvaluator passed to the mapper
* @param inpath
- * input <code>Path</code>
+ * input {@code Path}
* @param outpath
- * output <code>Path</code>
+ * output {@code Path}
*/
private static void configureJob(Job job,
Configuration conf,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java Mon May 2 18:14:18 2011
@@ -34,17 +34,15 @@ import org.apache.mahout.common.iterator
public final class OutputUtils {
private OutputUtils() {
- // do nothing
+ // do nothing
}
/**
- * Lists all files in the output <code>Path</code>
- *
- * @param fs
- * <code>FileSystem</code> to use
- * @param outpath
- * output <code>Path</code>
- * @return <code>Path</code> array
+ * Lists all files in the output {@code Path}
+ *
+ * @param fs {@code FileSystem} to use
+ * @param outpath output {@code Path}
+ * @return {@code Path} array
*/
public static Path[] listOutputFiles(FileSystem fs, Path outpath) throws IOException {
FileStatus[] status = fs.listStatus(outpath);
@@ -63,11 +61,9 @@ public final class OutputUtils {
/**
* Reads back the evaluations.
- *
- * @param outpath
- * output <code>Path</code>
- * @param evaluations
- * List of evaluations
+ *
+ * @param outpath output {@code Path}
+ * @param evaluations List of evaluations
*/
public static void importEvaluations(FileSystem fs,
Configuration conf,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/MultiLabelVectorWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/MultiLabelVectorWritable.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/MultiLabelVectorWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/MultiLabelVectorWritable.java Mon May 2 18:14:18 2011
@@ -82,7 +82,7 @@ public final class MultiLabelVectorWrita
}
public static void write(DataOutput out, SequentialAccessSparseVector ssv, int[] labels) throws IOException {
- (new MultiLabelVectorWritable(ssv, labels)).write(out);
+ new MultiLabelVectorWritable(ssv, labels).write(out);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java Mon May 2 18:14:18 2011
@@ -330,7 +330,7 @@ public class RowSimilarityJob extends Ab
}
/**
- * collects all {@link DistributedRowMatrix.MatrixEntryWritable} for each column and creates a {@link VectorWritable}
+ * collects all MatrixEntryWritable for each column and creates a {@link VectorWritable}
*/
public static class EntriesToVectorsReducer
extends Reducer<SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable,IntWritable,VectorWritable> {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/GivensThinSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/GivensThinSolver.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/GivensThinSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/GivensThinSolver.java Mon May 2 18:14:18 2011
@@ -488,7 +488,7 @@ public class GivensThinSolver {
double norm = ei.norm(2);
- if (Math.abs(1 - norm) < epsilon) {
+ if (Math.abs(1.0 - norm) < epsilon) {
rank++;
} else if (Math.abs(norm) > epsilon) {
return false; // not a rank deficiency, either
@@ -497,13 +497,12 @@ public class GivensThinSolver {
for (int j = 0; j <= i; j++) {
Vector ej = new DenseVector(qt[j], true);
double dot = ei.dot(ej);
- if (!(Math.abs((i == j && rank > j ? 1 : 0) - dot) < epsilon)) {
+ if (!(Math.abs((i == j && rank > j ? 1.0 : 0.0) - dot) < epsilon)) {
return false;
}
}
}
- return (!insufficientRank && rank == n) || (insufficientRank && rank < n);
-
+ return insufficientRank ? rank < n : rank == n;
}
public static boolean isOrthonormalBlocked(Iterable<double[][]> qtHats,
@@ -544,8 +543,7 @@ public class GivensThinSolver {
}
}
}
- return (!insufficientRank && rank == n) || (insufficientRank && rank < n);
-
+ return insufficientRank ? rank < n : rank == n;
}
private static final class TriangularRowView extends AbstractVector {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java Mon May 2 18:14:18 2011
@@ -24,11 +24,10 @@ import org.apache.mahout.math.Vector.Ele
/**
* simplistic implementation for Omega matrix in Stochastic SVD method
- *
*/
public class Omega {
- private static final double UNIFORM_DIVISOR = Math.pow(2d, 64);
+ private static final double UNIFORM_DIVISOR = Math.pow(2.0, 64);
private final long seed;
private final int kp;
@@ -36,22 +35,18 @@ public class Omega {
public Omega(long seed, int k, int p) {
this.seed = seed;
kp = k + p;
-
}
/**
* Get omega element at (x,y) uniformly distributed within [-1...1)
*
- * @param row
- * omega row
- * @param column
- * omega column
- * @return
+ * @param row omega row
+ * @param column omega column
*/
public double getQuick(int row, int column) {
- long hash = murmur64(row << Integer.SIZE | column, 8, seed);
+ long hash = murmur64((long) row << Integer.SIZE | column, 8, seed);
double result = hash / UNIFORM_DIVISOR;
- assert result >= -1d && result < 1d;
+ //assert result >= -1.0 && result < 1.0;
return result;
}
@@ -70,48 +65,44 @@ public class Omega {
* row of matrix Y (result) must be pre-allocated to size of (k+p)
*/
public void computeYRow(Vector aRow, double[] yRow) {
- assert yRow.length == kp;
-
- Arrays.fill(yRow, 0);
- if (!aRow.isDense()) {
- int j = 0;
- for (Element el : aRow) {
- accumDots(el.index(), el.get(), yRow);
- }
-
- } else {
+ //assert yRow.length == kp;
+ Arrays.fill(yRow, 0.0);
+ if (aRow.isDense()) {
int n = aRow.size();
for (int j = 0; j < n; j++) {
accumDots(j, aRow.getQuick(j), yRow);
}
+ } else {
+ for (Element el : aRow) {
+ accumDots(el.index(), el.get(), yRow);
+ }
}
}
/**
- * Shortened version for data < 8 bytes packed into <code>len</code> lowest
- * bytes of <code>val</code>.
- * <P>
- *
+ * Shortened version for data < 8 bytes packed into {@code len} lowest
+ * bytes of {@code val}.
+ *
* @param val
* the value
* @param len
* the length of data packed into this many low bytes of
- * <code>val</code>
+ * {@code val}
* @param seed
* the seed to use
* @return murmur hash
*/
public static long murmur64(long val, int len, long seed) {
- assert len > 0 && len <= 8;
+ //assert len > 0 && len <= 8;
long m = 0xc6a4a7935bd1e995L;
- int r = 47;
- long h = seed ^ (len * m);
+ long h = seed ^ len * m;
long k = val;
k *= m;
+ int r = 47;
k ^= k >>> r;
k *= m;
@@ -148,9 +139,10 @@ public class Omega {
if (offset < len) {
long k = 0;
- for (; offset < len; offset++) {
+ while (offset < len) {
k <<= 8;
k |= val[offset] & 0xff;
+ offset++;
}
h ^= k;
h *= m;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototype.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototype.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototype.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototype.java Mon May 2 18:14:18 2011
@@ -51,7 +51,7 @@ public class SSVDPrototype {
public SSVDPrototype(long seed, int kp, int r) {
this.kp = kp;
- omega = new Omega(seed, kp / 2, kp - (kp / 2));
+ omega = new Omega(seed, kp / 2, kp - kp / 2);
yRow = new double[kp];
// m_yRowV = new DenseVector(m_yRow,true);
this.r = r;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java Mon May 2 18:14:18 2011
@@ -115,7 +115,7 @@ public class SSVDSolver {
* create new SSVD solver. Required parameters are passed to constructor to
* ensure they are set. Optional parameters can be set using setters .
* <P>
- *
+ *
* @param conf
* hadoop configuration
* @param inputPath
@@ -125,7 +125,7 @@ public class SSVDSolver {
* Output path containing U, V and singular values vector files.
* @param ablockRows
* The vertical hight of a q-block (bigger value require more memory
- * in mappers+ perhaps larger <code>minSplitSize</code> values
+ * in mappers+ perhaps larger {@code minSplitSize} values
* @param k
* desired rank
* @param p
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapper.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocMapper.java Mon May 2 18:14:18 2011
@@ -99,8 +99,8 @@ public class CollocMapper extends Mapper
OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<String>(value.getEntries().size());
do {
- String term = (sf.getAttribute(TermAttribute.class)).term();
- String type = (sf.getAttribute(TypeAttribute.class)).type();
+ String term = sf.getAttribute(TermAttribute.class).term();
+ String type = sf.getAttribute(TypeAttribute.class).type();
if ("shingle".equals(type)) {
count++;
ngrams.adjustOrPutValue(term, 1, 1);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/Gram.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/Gram.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/Gram.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/Gram.java Mon May 2 18:14:18 2011
@@ -181,9 +181,9 @@ public class Gram extends BinaryComparab
/* Cribbed from o.a.hadoop.io.Text:
* Sets the capacity of this object to <em>at least</em>
- * <code>len</code> bytes. If the current buffer is longer,
+ * {@code len} bytes. If the current buffer is longer,
* then the capacity and existing content of the buffer are
- * unchanged. If <code>len</code> is larger
+ * unchanged. If {@code len} is larger
* than the current capacity, this object's capacity is
* increased to match.
* @param len the number of bytes we need
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/GramKey.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/GramKey.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/GramKey.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/GramKey.java Mon May 2 18:14:18 2011
@@ -93,9 +93,9 @@ public final class GramKey extends Binar
/* Cribbed from o.a.hadoop.io.Text:
* Sets the capacity of this object to <em>at least</em>
- * <code>len</code> bytes. If the current buffer is longer,
+ * {@code len} bytes. If the current buffer is longer,
* then the capacity and existing content of the buffer are
- * unchanged. If <code>len</code> is larger
+ * unchanged. If {@code len} is larger
* than the current capacity, this object's capacity is
* increased to match.
* @param len the number of bytes we need
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/TextValueEncoder.java Mon May 2 18:14:18 2011
@@ -81,8 +81,6 @@ public class TextValueEncoder extends Fe
/**
* Adds all of the tokens that we counted up to a vector.
- * @param weight The weight
- * @param data
*/
public void flush(double weight, Vector data) {
for (String word : counts.elementSet()) {
@@ -110,8 +108,6 @@ public class TextValueEncoder extends Fe
* Tokenizes a string using the simplest method. This should be over-ridden for more subtle
* tokenization.
* @see LuceneTextValueEncoder
- * @param originalForm
- * @return
*/
protected Iterable<String> tokenize(CharSequence originalForm) {
return onNonWord.split(originalForm);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java Mon May 2 18:14:18 2011
@@ -74,7 +74,7 @@ public class TFPartialVectorReducer exte
ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxNGramSize);
do {
- String term = (sf.getAttribute(TermAttribute.class)).term();
+ String term = sf.getAttribute(TermAttribute.class).term();
if (term.length() > 0 && dictionary.containsKey(term)) { // ngram
int termId = dictionary.get(term);
vector.setQuick(termId, vector.getQuick(termId) + 1);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java Mon May 2 18:14:18 2011
@@ -32,7 +32,7 @@ import org.apache.mahout.common.Pair;
/**
* <p>An {@link java.util.Iterator} which iterates over any of the KDD Cup's rating files. These include the files
* {train,test,validation}Idx{1,2}}.txt. See http://kddcup.yahoo.com/. Each element in the iteration corresponds
- * to one user's ratings as a {@link PreferenceArray} and corresponding timestamps as a parallel {@link long[]}
+ * to one user's ratings as a {@link PreferenceArray} and corresponding timestamps as a parallel {@code long}
* array.</p>
*
* <p>Timestamps in the data set are relative to some unknown point in time, for anonymity. They are assumed
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java Mon May 2 18:14:18 2011
@@ -32,10 +32,10 @@ import org.apache.mahout.common.Pair;
/**
* <p>This class converts a KDD Cup input file into a compressed CSV format. The output format is
- * <code>userID,itemID,score,timestamp</code>. It can optionally restrict its output to exclude
+ * {@code userID,itemID,score,timestamp}. It can optionally restrict its output to exclude
* score and/or timestamp.</p>
*
- * <p>Run as: <code>ToCSV (input file) (output file) [num columns to output]</p>
+ * <p>Run as: {@code ToCSV (input file) (output file) [num columns to output]}</p>
*/
public final class ToCSV {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java?rev=1098706&r1=1098705&r2=1098706&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Runner.java Mon May 2 18:14:18 2011
@@ -39,7 +39,7 @@ import org.slf4j.LoggerFactory;
* <p>Runs "track 1" of the KDD Cup competition using whatever recommender is inside {@link Track1Recommender}
* and attempts to output the result in the correct contest format.</p>
*
- * <p>Run as: <code>Track1Runner [track 1 data file directory] [output file]</code></p>
+ * <p>Run as: {@code Track1Runner [track 1 data file directory] [output file]}</p>
*/
public final class Track1Runner {