You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2013/06/22 16:20:37 UTC
svn commit: r1495738 - in /mahout/trunk/core/src:
main/java/org/apache/mahout/cf/taste/hadoop/
main/java/org/apache/mahout/cf/taste/hadoop/als/
main/java/org/apache/mahout/cf/taste/hadoop/preparation/
main/java/org/apache/mahout/cf/taste/impl/recommend...
Author: srowen
Date: Sat Jun 22 14:20:36 2013
New Revision: 1495738
URL: http://svn.apache.org/r1495738
Log:
More from IntelliJ inspection
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/BiasedItemBasedRecommender.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/StreamingKMeans.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeIterator.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/LocalitySensitiveHashSearch.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/UpdatableSearcher.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJobTest.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java Sat Jun 22 14:20:36 2013
@@ -58,9 +58,7 @@ public final class TasteHadoopUtils {
}
public static int readID(String token, boolean usesLongIDs) {
- return usesLongIDs
- ? TasteHadoopUtils.idToIndex(Long.parseLong(token))
- : Integer.parseInt(token);
+ return usesLongIDs ? idToIndex(Long.parseLong(token)) : Integer.parseInt(token);
}
/**
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALS.java Sat Jun 22 14:20:36 2013
@@ -78,11 +78,11 @@ final class ALS {
Path[] cachedFiles = HadoopUtil.getCachedFiles(conf);
LocalFileSystem localFs = FileSystem.getLocal(conf);
- for (int n = 0; n < cachedFiles.length; n++) {
+ for (Path cachedFile : cachedFiles) {
SequenceFile.Reader reader = null;
try {
- reader = new SequenceFile.Reader(localFs, cachedFiles[n], conf);
+ reader = new SequenceFile.Reader(localFs, cachedFile, conf);
while (reader.next(rowIndex, row)) {
featureMatrix.put(rowIndex.get(), row.get());
}
@@ -96,7 +96,7 @@ final class ALS {
}
public static OpenIntObjectHashMap<Vector> readMatrixByRows(Path dir, Configuration conf) {
- OpenIntObjectHashMap matrix = new OpenIntObjectHashMap<Vector>();
+ OpenIntObjectHashMap<Vector> matrix = new OpenIntObjectHashMap<Vector>();
for (Pair<IntWritable,VectorWritable> pair
: new SequenceFileDirIterable<IntWritable,VectorWritable>(dir, PathType.LIST, PathFilters.partFilter(), conf)) {
int rowIndex = pair.getFirst().get();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java Sat Jun 22 14:20:36 2013
@@ -388,8 +388,8 @@ public class ParallelALSFactorizationJob
static class MapLongIDsMapper extends Mapper<LongWritable,Text,VarIntWritable,VarLongWritable> {
private int tokenPos;
- private VarIntWritable index = new VarIntWritable();
- private VarLongWritable idWritable = new VarLongWritable();
+ private final VarIntWritable index = new VarIntWritable();
+ private final VarLongWritable idWritable = new VarLongWritable();
@Override
protected void setup(Context ctx) throws IOException, InterruptedException {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionMapper.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionMapper.java Sat Jun 22 14:20:36 2013
@@ -89,7 +89,7 @@ public class PredictionMapper extends Sh
OpenIntObjectHashMap<Vector> M = uAndM.getSecond();
Vector ratings = ratingsWritable.get();
- final int userIndex = userIndexWritable.get();
+ int userIndex = userIndexWritable.get();
final OpenIntHashSet alreadyRatedItems = new OpenIntHashSet(ratings.getNumNondefaultElements());
for (Vector.Element e : ratings.nonZeroes()) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java Sat Jun 22 14:20:36 2013
@@ -57,10 +57,10 @@ public class ToItemVectorsMapper
int numElementsAfterSampling = userRatings.getNumNondefaultElements();
int column = TasteHadoopUtils.idToIndex(rowIndex.get());
- Vector itemVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
itemVectorWritable.setWritesLaxPrecision(true);
+ Vector itemVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
for (Vector.Element elem : userRatings.nonZeroes()) {
itemID.set(elem.index());
itemVector.setQuick(column, elem.get());
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/BiasedItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/BiasedItemBasedRecommender.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/BiasedItemBasedRecommender.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/BiasedItemBasedRecommender.java Sat Jun 22 14:20:36 2013
@@ -143,7 +143,7 @@ public class BiasedItemBasedRecommender
float[] ratings = new float[userIDs.length];
long[] itemIDs = new long[userIDs.length];
- final double[] similarities = similarity.itemSimilarities(itemID, userIDs);
+ double[] similarities = similarity.itemSimilarities(itemID, userIDs);
for (int n = 0; n < preferencesFromUser.length(); n++) {
ratings[n] = preferencesFromUser.get(n).getValue();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java Sat Jun 22 14:20:36 2013
@@ -119,7 +119,6 @@ public class SamplingCandidateItemsStrat
@Override
protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel) throws TasteException {
- FastIDSet possibleItemsIDs = new FastIDSet();
LongPrimitiveIterator preferredItemIDsIterator = new LongPrimitiveArrayIterator(preferredItemIDs);
if (preferredItemIDs.length > maxItems) {
double samplingRate = (double) maxItems / preferredItemIDs.length;
@@ -127,6 +126,7 @@ public class SamplingCandidateItemsStrat
preferredItemIDsIterator =
new SamplingLongPrimitiveIterator(preferredItemIDsIterator, samplingRate);
}
+ FastIDSet possibleItemsIDs = new FastIDSet();
while (preferredItemIDsIterator.hasNext()) {
long itemID = preferredItemIDsIterator.nextLong();
PreferenceArray prefs = dataModel.getPreferencesForItem(itemID);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java Sat Jun 22 14:20:36 2013
@@ -213,12 +213,12 @@ public final class ImplicitLinearRegress
* @param recomputeUserFeatures
*/
public void reCalculateTrans(boolean recomputeUserFeatures) {
- if (!recomputeUserFeatures) {
- Matrix uMatrix = new DenseMatrix(userMatrix);
- userTransUser = uMatrix.transpose().times(uMatrix);
- } else {
+ if (recomputeUserFeatures) {
Matrix iMatrix = new DenseMatrix(itemMatrix);
itemTransItem = iMatrix.transpose().times(iMatrix);
+ } else {
+ Matrix uMatrix = new DenseMatrix(userMatrix);
+ userTransUser = uMatrix.transpose().times(uMatrix);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AveragingPreferenceInferrer.java Sat Jun 22 14:20:36 2013
@@ -64,12 +64,12 @@ public final class AveragingPreferenceIn
@Override
public Float get(Long key) throws TasteException {
- RunningAverage average = new FullRunningAverage();
PreferenceArray prefs = dataModel.getPreferencesFromUser(key);
int size = prefs.length();
if (size == 0) {
return ZERO;
}
+ RunningAverage average = new FullRunningAverage();
for (int i = 0; i < size; i++) {
average.addDatum(prefs.getValue(i));
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java Sat Jun 22 14:20:36 2013
@@ -128,8 +128,8 @@ public class ConfusionMatrix {
br += confusionMatrix[i][j];
}
double bc = 0;
- for (int j = 0; j < confusionMatrix.length; j++) {
- bc += confusionMatrix[j][i];
+ for (int[] vec : confusionMatrix) {
+ bc += vec[i];
}
b += br * bc;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/Dataset.java Sat Jun 22 14:20:36 2013
@@ -213,6 +213,7 @@ public class Dataset {
return values[labelId][(int) code];
}
+ @Override
public String toString() {
return "attributes=" + Arrays.toString(attributes);
}
@@ -373,7 +374,6 @@ public class Dataset {
*/
public static Dataset fromJSON(String json) {
- Dataset dataset = new Dataset();
List<Map<String, Object>> fromJSON;
try {
fromJSON = OBJECT_MAPPER.readValue(json, new TypeReference<List<Map<String, Object>>>() {});
@@ -383,6 +383,7 @@ public class Dataset {
List<Attribute> attributes = Lists.newLinkedList();
List<Integer> ignored = Lists.newLinkedList();
String[][] nominalValues = new String[fromJSON.size()][];
+ Dataset dataset = new Dataset();
for (int i = 0; i < fromJSON.size(); i++) {
Map<String, Object> attribute = fromJSON.get(i);
if (Attribute.fromString((String) attribute.get(TYPE)) == Attribute.IGNORED) {
@@ -394,13 +395,13 @@ public class Dataset {
dataset.labelId = i - ignored.size();
}
if (attribute.get(VALUES) != null) {
- List get = (List) attribute.get(VALUES);
- String[] array = (String[]) get.toArray(new String[]{});
+ List<String> get = (List<String>) attribute.get(VALUES);
+ String[] array = get.toArray(new String[get.size()]);
nominalValues[i] = array;
}
}
}
- dataset.attributes = attributes.toArray(new Attribute[]{});
+ dataset.attributes = attributes.toArray(new Attribute[attributes.size()]);
dataset.ignored = new int[ignored.size()];
dataset.values = nominalValues;
for (int i = 0; i < dataset.ignored.length; i++) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java Sat Jun 22 14:20:36 2013
@@ -107,7 +107,6 @@ public class RegressionSplit extends IgS
*/
private static Split numericalSplit(Data data, int attr) {
FullRunningAverage[] ra = new FullRunningAverage[2];
- double[] sk = new double[2];
for (int i = 0; i < ra.length; i++) {
ra[i] = new FullRunningAverage();
}
@@ -119,6 +118,7 @@ public class RegressionSplit extends IgS
}
Arrays.sort(instances, new InstanceComparator(attr));
+ double[] sk = new double[2];
for (Instance instance : instances) {
double xk = data.getDataset().getLabel(instance);
if (ra[1].getCount() == 0) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java Sat Jun 22 14:20:36 2013
@@ -114,12 +114,12 @@ public final class UDistrib {
}
private static void runTool(String dataStr, String datasetStr, String output, int numPartitions) throws IOException {
- Configuration conf = new Configuration();
Preconditions.checkArgument(numPartitions > 0, "numPartitions <= 0");
// make sure the output file does not exist
Path outputPath = new Path(output);
+ Configuration conf = new Configuration();
FileSystem fs = outputPath.getFileSystem(conf);
Preconditions.checkArgument(!fs.exists(outputPath), "Output path already exists");
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Sat Jun 22 14:20:36 2013
@@ -283,8 +283,8 @@ public class CanopyDriver extends Abstra
Path path = new Path(canopyOutputDir, "part-r-00000");
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
Text.class, ClusterWritable.class);
- ClusterWritable clusterWritable = new ClusterWritable();
try {
+ ClusterWritable clusterWritable = new ClusterWritable();
for (Canopy canopy : canopies) {
canopy.computeParameters();
if (log.isDebugEnabled()) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyReducer.java Sat Jun 22 14:20:36 2013
@@ -47,9 +47,9 @@ public class CanopyReducer extends Reduc
canopyClusterer.addPointToCanopies(point, canopies);
}
for (Canopy canopy : canopies) {
- ClusterWritable clusterWritable = new ClusterWritable();
canopy.computeParameters();
if (canopy.getNumObservations() > clusterFilter) {
+ ClusterWritable clusterWritable = new ClusterWritable();
clusterWritable.setValue(canopy);
context.write(new Text(canopy.getIdentifier()), clusterWritable);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashMapper.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/minhash/MinHashMapper.java Sat Jun 22 14:20:36 2013
@@ -39,8 +39,8 @@ public class MinHashMapper extends Mappe
private byte[] bytesToHash;
private boolean hashValue;
- private Text cluster = new Text();
- private VectorWritable vector = new VectorWritable();
+ private final Text cluster = new Text();
+ private final VectorWritable vector = new VectorWritable();
@Override
protected void setup(Context context) throws IOException, InterruptedException {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java Sat Jun 22 14:20:36 2013
@@ -200,10 +200,10 @@ public class EigencutsDriver extends Abs
* @return
*/
private static double median(Vector v) {
- OnlineSummarizer med = new OnlineSummarizer();
if (v.size() < 100) {
return v.zSum() / v.size();
}
+ OnlineSummarizer med = new OnlineSummarizer();
for (Vector.Element e : v.all()) {
med.add(e.get());
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/BallKMeans.java Sat Jun 22 14:20:36 2013
@@ -245,12 +245,12 @@ public class BallKMeans implements Itera
* @param datapoints The datapoints to select from. These datapoints should be WeightedVectors of some kind.
*/
private void initializeSeedsRandomly(List<? extends WeightedVector> datapoints) {
- Multinomial<Integer> seedSelector = new Multinomial<Integer>();
int numDatapoints = datapoints.size();
double totalWeight = 0;
for (WeightedVector datapoint : datapoints) {
totalWeight += datapoint.getWeight();
}
+ Multinomial<Integer> seedSelector = new Multinomial<Integer>();
for (int i = 0; i < numDatapoints; ++i) {
seedSelector.add(i, datapoints.get(i).getWeight() / totalWeight);
}
@@ -330,7 +330,7 @@ public class BallKMeans implements Itera
// set to the squared distance from c_1
for (int i = 0; i < datapoints.size(); ++i) {
WeightedVector row = datapoints.get(i);
- final double w = distanceMeasure.distance(c_1, row) * 2 * Math.log(1 + row.getWeight());
+ double w = distanceMeasure.distance(c_1, row) * 2 * Math.log(1 + row.getWeight());
seedSelector.set(i, w);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/StreamingKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/StreamingKMeans.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/StreamingKMeans.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/cluster/StreamingKMeans.java Sat Jun 22 14:20:36 2013
@@ -128,7 +128,7 @@ public class StreamingKMeans implements
/**
* Random object to sample values from.
*/
- private Random random = RandomUtils.getRandom();
+ private final Random random = RandomUtils.getRandom();
/**
* Calls StreamingKMeans(searcher, numClusters, 1.3, 10, 2).
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansReducer.java Sat Jun 22 14:20:36 2013
@@ -71,7 +71,7 @@ public class StreamingKMeansReducer exte
}
}
- public List<Centroid> centroidWritablesToList(Iterable<CentroidWritable> centroids) {
+ public static List<Centroid> centroidWritablesToList(Iterable<CentroidWritable> centroids) {
// A new list must be created because Hadoop iterators mutate the contents of the Writable in
// place, without allocating new references when iterating through the centroids Iterable.
return Lists.newArrayList(Iterables.transform(centroids, new Function<CentroidWritable, Centroid>() {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansThread.java Sat Jun 22 14:20:36 2013
@@ -34,8 +34,8 @@ import org.apache.mahout.math.neighborho
public class StreamingKMeansThread implements Callable<Iterable<Centroid>> {
private static final int NUM_ESTIMATE_POINTS = 1000;
- private Configuration conf;
- private Iterable<Centroid> datapoints;
+ private final Configuration conf;
+ private final Iterable<Centroid> datapoints;
public StreamingKMeansThread(Path input, Configuration conf) {
this(StreamingKMeansUtilsMR.getCentroidsFromVectorWritable(
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/streaming/mapreduce/StreamingKMeansUtilsMR.java Sat Jun 22 14:20:36 2013
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.mahout.clustering.streaming.mapreduce;
import java.io.IOException;
@@ -24,7 +41,10 @@ import org.apache.mahout.math.neighborho
import org.apache.mahout.math.neighborhood.ProjectionSearch;
import org.apache.mahout.math.neighborhood.UpdatableSearcher;
-public class StreamingKMeansUtilsMR {
+public final class StreamingKMeansUtilsMR {
+
+ private StreamingKMeansUtilsMR() {
+ }
/**
* Instantiates a searcher from a given configuration.
@@ -37,7 +57,7 @@ public class StreamingKMeansUtilsMR {
DistanceMeasure distanceMeasure;
String distanceMeasureClass = conf.get(DefaultOptionCreator.DISTANCE_MEASURE_OPTION);
try {
- distanceMeasure = (DistanceMeasure)Class.forName(distanceMeasureClass).newInstance();
+ distanceMeasure = (DistanceMeasure) Class.forName(distanceMeasureClass).getConstructor().newInstance();
} catch (Exception e) {
throw new RuntimeException("Failed to instantiate distanceMeasure", e);
}
@@ -72,8 +92,7 @@ public class StreamingKMeansUtilsMR {
*/
public static Iterable<Centroid> getCentroidsFromVectorWritable(Iterable<VectorWritable> inputIterable) {
return Iterables.transform(inputIterable, new Function<VectorWritable, Centroid>() {
- int numVectors = 0;
-
+ private int numVectors = 0;
@Override
public Centroid apply(VectorWritable input) {
Preconditions.checkNotNull(input);
@@ -90,7 +109,7 @@ public class StreamingKMeansUtilsMR {
* @param input Iterable of Vectors to cast
* @return the new Centroids
*/
- public static Iterable<Centroid> castVectorsToCentroids(final Iterable<Vector> input) {
+ public static Iterable<Centroid> castVectorsToCentroids(Iterable<Vector> input) {
return Iterables.transform(input, new Function<Vector, Centroid>() {
private int numVectors = 0;
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterCountReader.java Sat Jun 22 14:20:36 2013
@@ -85,7 +85,8 @@ public final class ClusterCountReader {
conf);
int i = 0;
while (it.hasNext()) {
- Integer key, value;
+ Integer key;
+ Integer value;
if (keyIsClusterId) { // key is the cluster id, value is i, the index we will use
key = it.next().getValue().getId();
value = i;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Sat Jun 22 14:20:36 2013
@@ -19,7 +19,6 @@ package org.apache.mahout.common;
import java.io.File;
import java.io.IOException;
-import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
@@ -114,7 +113,7 @@ public abstract class AbstractJob extend
private Group group;
protected AbstractJob() {
- options = Lists.newLinkedList();;
+ options = Lists.newLinkedList();
}
/** Returns the input path established by a call to {@link #parseArguments(String[])}.
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java Sat Jun 22 14:20:36 2013
@@ -93,8 +93,6 @@ public final class MahoutDriver {
public static void main(String[] args) throws Throwable {
- ProgramDriver programDriver = new ProgramDriver();
-
Properties mainClasses = loadProperties("driver.classes.props");
if (mainClasses == null) {
mainClasses = loadProperties("driver.classes.default.props");
@@ -104,6 +102,7 @@ public final class MahoutDriver {
}
boolean foundShortName = false;
+ ProgramDriver programDriver = new ProgramDriver();
for (Object key : mainClasses.keySet()) {
String keyString = (String) key;
if (args.length > 0 && shortName(mainClasses.getProperty(keyString)).equals(args[0])) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/ep/EvolutionaryProcess.java Sat Jun 22 14:20:36 2013
@@ -215,7 +215,7 @@ public class EvolutionaryProcess<T exten
int n = input.readInt();
population = Lists.newArrayList();
for (int i = 0; i < n; i++) {
- State<T, U> state = PolymorphicWritable.read(input, State.class);
+ State<T, U> state = (State<T, U>) PolymorphicWritable.read(input, State.class);
population.add(state);
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java Sat Jun 22 14:20:36 2013
@@ -164,12 +164,12 @@ public final class PFPGrowth {
public static IntArrayList getGroupMembers(int groupId,
int maxPerGroup,
int numFeatures) {
- IntArrayList ret = new IntArrayList();
int start = groupId * maxPerGroup;
int end = start + maxPerGroup;
if (end > numFeatures) {
end = numFeatures;
}
+ IntArrayList ret = new IntArrayList();
for (int i = start; i < end; i++) {
ret.add(i);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java Sat Jun 22 14:20:36 2013
@@ -19,7 +19,6 @@ package org.apache.mahout.fpm.pfpgrowth;
import java.io.IOException;
import java.util.Arrays;
-import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeIterator.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionTreeIterator.java Sat Jun 22 14:20:36 2013
@@ -68,9 +68,9 @@ final class TransactionTreeIterator exte
}
} while (sum == transactionTree.count(childId));
- IntArrayList data = new IntArrayList();
Iterator<int[]> it = depth.iterator();
it.next();
+ IntArrayList data = new IntArrayList();
while (it.hasNext()) {
data.add(transactionTree.attribute(it.next()[0]));
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java Sat Jun 22 14:20:36 2013
@@ -190,8 +190,8 @@ public final class MatrixColumnMeansJob
private static final IntWritable ONE = new IntWritable(1);
private String vectorClass;
- Vector outputVector;
- VectorWritable outputVectorWritable = new VectorWritable();
+ private Vector outputVector;
+ private final VectorWritable outputVectorWritable = new VectorWritable();
@Override
public void setup(Context context) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtDenseOutJob.java Sat Jun 22 14:20:36 2013
@@ -24,7 +24,6 @@ import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Deque;
import java.util.Iterator;
-import java.util.LinkedList;
import java.util.regex.Matcher;
import com.google.common.collect.Lists;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java Sat Jun 22 14:20:36 2013
@@ -23,7 +23,6 @@ import java.text.NumberFormat;
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Deque;
-import java.util.LinkedList;
import java.util.regex.Matcher;
import com.google.common.collect.Lists;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java Sat Jun 22 14:20:36 2013
@@ -20,7 +20,6 @@ package org.apache.mahout.math.hadoop.st
import java.io.Closeable;
import java.io.IOException;
import java.util.Deque;
-import java.util.LinkedList;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java Sat Jun 22 14:20:36 2013
@@ -62,9 +62,9 @@ public class VJob {
/*
* xi and s_q are PCA-related corrections, per MAHOUT-817
*/
- Vector xi;
- Vector sq;
- PlusMult plusMult = new PlusMult(0);
+ private Vector xi;
+ private Vector sq;
+ private final PlusMult plusMult = new PlusMult(0);
@Override
protected void map(IntWritable key, VectorWritable value, Context context)
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/BruteSearch.java Sat Jun 22 14:20:36 2013
@@ -40,7 +40,7 @@ public class BruteSearch extends Updatab
/**
* The list of reference vectors.
*/
- private List<Vector> referenceVectors;
+ private final List<Vector> referenceVectors;
public BruteSearch(DistanceMeasure distanceMeasure) {
super(distanceMeasure);
@@ -66,6 +66,7 @@ public class BruteSearch extends Updatab
* @param limit The number of results to returned; must be at least 1.
* @return A list of the closest @limit neighbors for the given query.
*/
+ @Override
public List<WeightedThing<Vector>> search(Vector query, int limit) {
Preconditions.checkArgument(limit > 0);
limit = Math.min(limit, referenceVectors.size());
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java Sat Jun 22 14:20:36 2013
@@ -291,7 +291,7 @@ public class FastProjectionSearch extend
public Iterator<Vector> iterator() {
reindex(true);
return new AbstractIterator<Vector>() {
- Iterator<WeightedThing<Vector>> data = scalarProjections.get(0).iterator();
+ private final Iterator<WeightedThing<Vector>> data = scalarProjections.get(0).iterator();
@Override
protected Vector computeNext() {
do {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java Sat Jun 22 14:20:36 2013
@@ -34,7 +34,7 @@ public class HashedVector extends Weight
/**
* Value of the locality sensitive hash. It is 64 bit.
*/
- private long hash;
+ private final long hash;
public HashedVector(Vector vector, long hash, int index) {
super(vector, 1, index);
@@ -55,7 +55,7 @@ public class HashedVector extends Weight
long hash = 0;
for (Element element : projection.times(vector).nonZeroes()) {
if (element.get() > 0) {
- hash += 1 << element.index();
+ hash += 1L << element.index();
}
}
return hash;
@@ -89,10 +89,9 @@ public class HashedVector extends Weight
}
if (!(o instanceof HashedVector)) {
return o instanceof Vector && this.minus((Vector) o).norm(1) == 0;
- } else {
- HashedVector v = (HashedVector) o;
- return v.hash == this.hash && this.minus(v).norm(1) == 0;
}
+ HashedVector v = (HashedVector) o;
+ return v.hash == this.hash && this.minus(v).norm(1) == 0;
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/LocalitySensitiveHashSearch.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/LocalitySensitiveHashSearch.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/LocalitySensitiveHashSearch.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/LocalitySensitiveHashSearch.java Sat Jun 22 14:20:36 2013
@@ -24,7 +24,7 @@ import org.apache.mahout.math.stats.Onli
* is that it does an adaptive cutoff for the cutoff on the bitwise distance. Making this
* cutoff adaptive means that we only needs to make a single pass through the data.
*/
-public class LocalitySensitiveHashSearch extends UpdatableSearcher implements Iterable<Vector> {
+public class LocalitySensitiveHashSearch extends UpdatableSearcher {
/**
* Number of bits in the locality sensitive hash. 64 bits fix neatly into a long.
*/
@@ -51,7 +51,7 @@ public class LocalitySensitiveHashSearch
*/
private static final int MIN_DISTRIBUTION_COUNT = 10;
- private Multiset<HashedVector> trainingVectors = HashMultiset.create();
+ private final Multiset<HashedVector> trainingVectors = HashMultiset.create();
/**
* This matrix of BITS random vectors is used to compute the Locality Sensitive Hash
@@ -105,10 +105,6 @@ public class LocalitySensitiveHashSearch
// We keep an approximation of the closest vectors here.
PriorityQueue<WeightedThing<Vector>> top = Searcher.getCandidateQueue(getSearchSize());
- // We keep the counts of the hash distances here. This lets us accurately
- // judge what hash distance cutoff we should use.
- int[] hashCounts = new int[BITS + 1];
-
// We scan the vectors using bit counts as an approximation of the dot product so we can do as few
// full distance computations as possible. Our goal is to only do full distance computations for
// vectors with hash distance at most as large as the searchSize biggest hash distance seen so far.
@@ -118,12 +114,17 @@ public class LocalitySensitiveHashSearch
distribution[i] = new OnlineSummarizer();
}
+ distanceEvaluations = 0;
+
+ // We keep the counts of the hash distances here. This lets us accurately
+ // judge what hash distance cutoff we should use.
+ int[] hashCounts = new int[BITS + 1];
+
// Maximum number of different bits to still consider a vector a candidate for nearest neighbor.
// Starts at the maximum number of bits, but decreases and can increase.
int hashLimit = BITS;
int limitCount = 0;
double distanceLimit = Double.POSITIVE_INFINITY;
- distanceEvaluations = 0;
// In this loop, we have the invariants that:
//
@@ -213,7 +214,7 @@ public class LocalitySensitiveHashSearch
return removeHash(best);
}
- protected WeightedThing<Vector> removeHash(WeightedThing<Vector> input) {
+ protected static WeightedThing<Vector> removeHash(WeightedThing<Vector> input) {
return new WeightedThing<Vector>(((HashedVector) input.getValue()).getVector(), input.getWeight());
}
@@ -223,6 +224,7 @@ public class LocalitySensitiveHashSearch
trainingVectors.add(new HashedVector(vector, projection, HashedVector.INVALID_INDEX, BIT_MASK));
}
+ @Override
public int size() {
return trainingVectors.size();
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java Sat Jun 22 14:20:36 2013
@@ -38,7 +38,7 @@ import org.apache.mahout.math.random.Wei
/**
* Does approximate nearest neighbor dudes search by projecting the data.
*/
-public class ProjectionSearch extends UpdatableSearcher implements Iterable<Vector> {
+public class ProjectionSearch extends UpdatableSearcher {
/**
* A lists of tree sets containing the scalar projections of each vector.
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java Sat Jun 22 14:20:36 2013
@@ -38,7 +38,7 @@ import org.apache.mahout.math.random.Wei
public abstract class Searcher implements Iterable<Vector> {
protected DistanceMeasure distanceMeasure;
- public Searcher(DistanceMeasure distanceMeasure) {
+ protected Searcher(DistanceMeasure distanceMeasure) {
this.distanceMeasure = distanceMeasure;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/UpdatableSearcher.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/UpdatableSearcher.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/UpdatableSearcher.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/UpdatableSearcher.java Sat Jun 22 14:20:36 2013
@@ -25,7 +25,7 @@ import org.apache.mahout.math.Vector;
*/
public abstract class UpdatableSearcher extends Searcher {
- public UpdatableSearcher(DistanceMeasure distanceMeasure) {
+ protected UpdatableSearcher(DistanceMeasure distanceMeasure) {
super(distanceMeasure);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java Sat Jun 22 14:20:36 2013
@@ -172,9 +172,10 @@ public final class DictionaryVectorizer
Path dictionaryJobPath = new Path(output, DICTIONARY_JOB_FOLDER);
+ log.info("Creating dictionary from {} and saving at {}", input, dictionaryJobPath);
+
int[] maxTermDimension = new int[1];
List<Path> dictionaryChunks;
- log.info("Creating dictionary from {} and saving at {}", input, dictionaryJobPath);
if (maxNGramSize == 1) {
startWordCounting(input, dictionaryJobPath, baseConf, minSupport);
dictionaryChunks =
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/TFIDF.java Sat Jun 22 14:20:36 2013
@@ -23,15 +23,6 @@ public class TFIDF implements Weight {
private final DefaultSimilarity sim = new DefaultSimilarity();
- public TFIDF() {
- }
-
-/* public TFIDF(Similarity sim) {
- this.sim = sim;
- }
- */
-
-
@Override
public double calculate(int tf, int df, int length, int numDocs) {
// ignore length
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/document/SequenceFileTokenizerMapper.java Sat Jun 22 14:20:36 2013
@@ -44,8 +44,8 @@ public class SequenceFileTokenizerMapper
TokenStream stream = analyzer.tokenStream(key.toString(), new StringReader(value.toString()));
stream.reset();
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
- StringTuple document = new StringTuple();
stream.reset();
+ StringTuple document = new StringTuple();
while (stream.incrementToken()) {
if (termAtt.length() > 0) {
document.add(new String(termAtt.buffer(), 0, termAtt.length()));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java Sat Jun 22 14:20:36 2013
@@ -70,7 +70,7 @@ public class WordsPrunerReducer extends
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
- Path[] localFiles = HadoopUtil.getCachedFiles(conf);
+ //Path[] localFiles = HadoopUtil.getCachedFiles(conf);
maxDf = conf.getLong(HighDFWordsPruner.MAX_DF, Long.MAX_VALUE);
minDf = conf.getLong(HighDFWordsPruner.MIN_DF, -1);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJobTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJobTest.java?rev=1495738&r1=1495737&r2=1495738&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJobTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJobTest.java Sat Jun 22 14:20:36 2013
@@ -234,7 +234,7 @@ public class ParallelALSFactorizationJob
@Test
public void exampleWithIDMapping() throws Exception {
- String[] preferencesWithLongIDs = new String[] {
+ String[] preferencesWithLongIDs = {
"5568227754922264005,-4758971626494767444,5.0",
"5568227754922264005,3688396615879561990,5.0",
"5568227754922264005,4594226737871995304,2.0",
@@ -318,7 +318,7 @@ public class ParallelALSFactorizationJob
@Test
public void recommenderJobWithIDMapping() throws Exception {
- String[] preferencesWithLongIDs = new String[] {
+ String[] preferencesWithLongIDs = {
"5568227754922264005,-4758971626494767444,5.0",
"5568227754922264005,3688396615879561990,5.0",
"5568227754922264005,4594226737871995304,2.0",
@@ -349,7 +349,7 @@ public class ParallelALSFactorizationJob
"--numThreadsPerSolver", String.valueOf(1),
"--usesLongIDs", String.valueOf(true) });
- assertEquals(success, 0);
+ assertEquals(0, success);
// reset as we run in the same JVM
SharingMapper.reset();