You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2012/06/20 14:07:58 UTC
svn commit: r1352052 [1/7] - in /mahout/trunk: ./ buildtools/
buildtools/src/main/resources/ core/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/
core/src/main/java/org/apache/mahout/cf/t...
Author: srowen
Date: Wed Jun 20 12:07:50 2012
New Revision: 1352052
URL: http://svn.apache.org/viewvc?rev=1352052&view=rev
Log:
Huge attack on static analysis bugs, PMD, checkstyle, findbugs warnings
Modified:
mahout/trunk/buildtools/pom.xml
mahout/trunk/buildtools/src/main/resources/findbugs-exclude.xml
mahout/trunk/core/pom.xml
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALSUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/TreeVisualizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/UDistrib.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/evaluation/Auc.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/BaumWelchTrainer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/RandomSequenceGenerator.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/ViterbiEvaluator.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AbstractOnlineLogisticRegression.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegression.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/AbstractCluster.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationConfigKeys.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/ClusterClassificationMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/classify/WeightedPropertyVectorWritable.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/DirichletDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/DistributionDescription.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/dirichlet/models/GaussianCluster.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CIMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CIReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/CanopyClusteringPolicy.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusterIterator.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/ClusteringPolicy.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/DirichletClusteringPolicy.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/FuzzyKMeansClusteringPolicy.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/KMeansClusteringPolicy.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/iterator/MeanShiftClusteringPolicy.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/Kluster.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/topdown/postprocessor/ClusterOutputPostProcessor.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/CommandLineUtil.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java
mahout/trunk/core/src/main/java/org/apache/mahout/driver/MahoutDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowth.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/convertors/TransactionIterator.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth/FrequentPatternMaxHeap.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthIds.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPGrowthObj.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/fpgrowth2/FPTree.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixColumnMeansJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/LoglikelihoodSimilarity.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/solver/DistributedConjugateGradientSolver.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/VarianceTotals.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/ABtJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/BtJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/DenseBlockWritable.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/Omega.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SplitPartitionedWritable.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/qr/GramSchmidt.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvd.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/Sampler.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/CalculateEntropyReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/DoubleSumReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/Entropy.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/InformationGain.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DocumentProcessor.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFiles.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/HighDFWordsPruner.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SimpleTextEncodingVectorizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/Vectorizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/CollocDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/collocations/llr/Gram.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/common/PartialVectorMerger.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/AdaptiveWordValueEncoder.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFConverter.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizerTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/Step1MapperTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/ClusteringTestUtils.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/classify/ClusterClassificationDriverTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/iterator/TestClusterClassifier.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/kmeans/TestKmeansClustering.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/cvb/TestCVBModelTrainer.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyStatusReporter.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/StringUtilsTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthRetailDataTestVs.java
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/FPGrowthSyntheticDataTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest2.java
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTestVs.java
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthSynthDataTest2.java
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthTest2.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/GramKeyGroupComparatorTest.java
mahout/trunk/distribution/pom.xml
mahout/trunk/examples/pom.xml
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/BuildForest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/df/mapreduce/TestForest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticModelParameters.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunAdaptiveLogistic.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/RunLogistic.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestASFEmail.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TestNewsGroups.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainASFEmail.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainAdaptiveLogistic.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainLogistic.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/TrainNewsGroups.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayDirichlet.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayFuzzyKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayMinHash.java
mahout/trunk/integration/pom.xml
mahout/trunk/integration/src/main/java/org/apache/mahout/classifier/ConfusionMatrixDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaXmlSplitter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInputJob.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterWriter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/GraphMLClusterWriter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java
mahout/trunk/integration/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java
mahout/trunk/math/pom.xml
mahout/trunk/math/src/main/java/org/apache/mahout/collections/Arithmetic.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/Arrays.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/PersistentObject.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/Sorting.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/SparseColumnMatrix.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/als/AlternatingLeastSquaresSolver.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/als/ImplicitFeedbackAlternatingLeastSquaresSolver.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/list/AbstractList.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/list/ObjectArrayList.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/list/SimpleLongArrayList.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/map/PrimeFinder.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/set/OpenHashSet.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/solver/EigenDecomposition.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/solver/LSMR.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/TestMatrixView.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/als/AlternatingLeastSquaresSolverTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/solver/EigenDecompositionTest.java
mahout/trunk/pom.xml
Modified: mahout/trunk/buildtools/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/buildtools/pom.xml?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/buildtools/pom.xml (original)
+++ mahout/trunk/buildtools/pom.xml Wed Jun 20 12:07:50 2012
@@ -44,7 +44,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-eclipse-plugin</artifactId>
- <version>2.8</version>
+ <version>2.9</version>
<inherited>false</inherited>
<executions>
<execution>
@@ -63,6 +63,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
+ <version>1.7</version>
<inherited>false</inherited>
<dependencies>
<dependency>
Modified: mahout/trunk/buildtools/src/main/resources/findbugs-exclude.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/buildtools/src/main/resources/findbugs-exclude.xml?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/buildtools/src/main/resources/findbugs-exclude.xml (original)
+++ mahout/trunk/buildtools/src/main/resources/findbugs-exclude.xml Wed Jun 20 12:07:50 2012
@@ -22,6 +22,9 @@
<Bug pattern="MS_PKGPROTECT"/>
</Match>
<Match>
+ <Bug pattern="NP_PARAMETER_MUST_BE_NONNULL_BUT_MARKED_AS_NULLABLE"/>
+ </Match>
+ <Match>
<Bug pattern="RV_RETURN_VALUE_IGNORED"/>
</Match>
<Match>
Modified: mahout/trunk/core/pom.xml
URL: http://svn.apache.org/viewvc/mahout/trunk/core/pom.xml?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/pom.xml (original)
+++ mahout/trunk/core/pom.xml Wed Jun 20 12:07:50 2012
@@ -39,6 +39,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
+ <version>2.4</version>
<configuration>
<encoding>UTF-8</encoding>
<source>1.6</source>
@@ -49,6 +50,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
+ <version>1.7</version>
<executions>
<execution>
<phase>compile</phase>
@@ -69,6 +71,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
+ <version>2.4</version>
<executions>
<execution>
<goals>
@@ -82,6 +85,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
+ <version>2.3</version>
<executions>
<execution>
<id>job</id>
@@ -109,6 +113,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-remote-resources-plugin</artifactId>
+ <version>1.3</version>
<configuration>
<appendedResourcesDirectory>../src/main/appended-resources</appendedResourcesDirectory>
<resourceBundles>
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java Wed Jun 20 12:07:50 2012
@@ -38,7 +38,8 @@ public final class TasteHadoopUtils {
/** Standard delimiter of textual preference data */
private static final Pattern PREFERENCE_TOKEN_DELIMITER = Pattern.compile("[\t,]");
- private TasteHadoopUtils() {}
+ private TasteHadoopUtils() {
+ }
/**
* Splits a preference data line into string tokens
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALSUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALSUtils.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALSUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ALSUtils.java Wed Jun 20 12:07:50 2012
@@ -34,7 +34,8 @@ import java.util.Iterator;
final class ALSUtils {
- private ALSUtils() {}
+ private ALSUtils() {
+ }
static Vector readFirstRow(Path dir, Configuration conf) throws IOException {
Iterator<VectorWritable> iterator = new SequenceFileDirValueIterator<VectorWritable>(dir,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/DatasetSplitter.java Wed Jun 20 12:07:50 2012
@@ -32,8 +32,6 @@ import org.apache.mahout.common.Abstract
import org.apache.mahout.common.RandomUtils;
import java.io.IOException;
-import java.util.List;
-import java.util.Map;
import java.util.Random;
/**
@@ -74,7 +72,6 @@ public class DatasetSplitter extends Abs
addOption("probePercentage", "p", "percentage of the data to use as probe set (default: "
+ DEFAULT_PROBE_PERCENTAGE + ')', String.valueOf(DEFAULT_PROBE_PERCENTAGE));
- Map<String, List<String>> parsedArgs = parseArguments(args);
double trainingPercentage = Double.parseDouble(getOption("trainingPercentage"));
double probePercentage = Double.parseDouble(getOption("probePercentage"));
String tempDir = getOption("tempDir");
@@ -88,22 +85,25 @@ public class DatasetSplitter extends Abs
markPreferences.getConfiguration().set(TRAINING_PERCENTAGE, String.valueOf(trainingPercentage));
markPreferences.getConfiguration().set(PROBE_PERCENTAGE, String.valueOf(probePercentage));
boolean succeeded = markPreferences.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
Job createTrainingSet = prepareJob(markedPrefs, trainingSetPath, SequenceFileInputFormat.class,
WritePrefsMapper.class, NullWritable.class, Text.class, TextOutputFormat.class);
createTrainingSet.getConfiguration().set(PART_TO_USE, INTO_TRAINING_SET.toString());
succeeded = createTrainingSet.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
Job createProbeSet = prepareJob(markedPrefs, probeSetPath, SequenceFileInputFormat.class,
WritePrefsMapper.class, NullWritable.class, Text.class, TextOutputFormat.class);
createProbeSet.getConfiguration().set(PART_TO_USE, INTO_PROBE_SET.toString());
succeeded = createProbeSet.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
return 0;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/FactorizationEvaluator.java Wed Jun 20 12:07:50 2012
@@ -17,6 +17,7 @@
package org.apache.mahout.cf.taste.hadoop.als;
+import com.google.common.base.Charsets;
import com.google.common.io.Closeables;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -89,15 +90,16 @@ public class FactorizationEvaluator exte
predictRatings.getConfiguration().set(USER_FEATURES_PATH, getOption("userFeatures"));
predictRatings.getConfiguration().set(ITEM_FEATURES_PATH, getOption("itemFeatures"));
boolean succeeded = predictRatings.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
BufferedWriter writer = null;
try {
FileSystem fs = FileSystem.get(getOutputPath().toUri(), getConf());
FSDataOutputStream outputStream = fs.create(getOutputPath("rmse.txt"));
double rmse = computeRmse(errors);
- writer = new BufferedWriter(new OutputStreamWriter(outputStream));
+ writer = new BufferedWriter(new OutputStreamWriter(outputStream, Charsets.UTF_8));
writer.write(String.valueOf(rmse));
} finally {
Closeables.closeQuietly(writer);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java Wed Jun 20 12:07:50 2012
@@ -135,8 +135,9 @@ public class ParallelALSFactorizationJob
VectorWritable.class, SequenceFileOutputFormat.class);
itemRatings.setCombinerClass(VectorSumReducer.class);
boolean succeeded = itemRatings.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
/* create A */
Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(),
@@ -144,8 +145,9 @@ public class ParallelALSFactorizationJob
VectorWritable.class);
userRatings.setCombinerClass(MergeVectorsCombiner.class);
succeeded = userRatings.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
//TODO this could be fiddled into one of the upper jobs
Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"),
@@ -153,8 +155,9 @@ public class ParallelALSFactorizationJob
IntWritable.class, VectorWritable.class);
averageItemRatings.setCombinerClass(MergeVectorsCombiner.class);
succeeded = averageItemRatings.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
Vector averageRatings = ALSUtils.readFirstRow(getTempPath("averageRatings"), getConf());
@@ -226,24 +229,21 @@ public class ParallelALSFactorizationJob
solverConf.setInt(NUM_FEATURES, numFeatures);
solverConf.set(FEATURE_MATRIX, pathToUorI.toString());
boolean succeeded = solverForUorI.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
throw new IllegalStateException("Job failed!");
+ }
}
static class SolveExplicitFeedbackMapper extends Mapper<IntWritable,VectorWritable,IntWritable,VectorWritable> {
private double lambda;
private int numFeatures;
-
private OpenIntObjectHashMap<Vector> UorM;
- private AlternatingLeastSquaresSolver solver;
-
@Override
protected void setup(Mapper.Context ctx) throws IOException, InterruptedException {
lambda = Double.parseDouble(ctx.getConfiguration().get(LAMBDA));
numFeatures = ctx.getConfiguration().getInt(NUM_FEATURES, -1);
- solver = new AlternatingLeastSquaresSolver();
Path UOrIPath = new Path(ctx.getConfiguration().get(FEATURE_MATRIX));
@@ -262,7 +262,7 @@ public class ParallelALSFactorizationJob
featureVectors.add(UorM.get(index));
}
- Vector uiOrmj = solver.solve(featureVectors, ratings, lambda, numFeatures);
+ Vector uiOrmj = AlternatingLeastSquaresSolver.solve(featureVectors, ratings, lambda, numFeatures);
ctx.write(userOrItemID, new VectorWritable(uiOrmj));
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/RecommenderJob.java Wed Jun 20 12:07:50 2012
@@ -93,8 +93,9 @@ public class RecommenderJob extends Abst
prediction.getConfiguration().set(ITEM_FEATURES_PATH, getOption("itemFeatures"));
prediction.getConfiguration().set(MAX_RATING, getOption("maxRating"));
boolean succeeded = prediction.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
return 0;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java Wed Jun 20 12:07:50 2012
@@ -182,7 +182,7 @@ public final class AggregateAndRecommend
* find the top entries in recommendationVector, map them to the real itemIDs and write back the result
*/
private void writeRecommendedItems(VarLongWritable userID, Vector recommendationVector, Context context)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
TopK<RecommendedItem> topKItems = new TopK<RecommendedItem>(recommendationsPerUser, BY_PREFERENCE_VALUE);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Wed Jun 20 12:07:50 2012
@@ -72,10 +72,12 @@ import java.util.regex.Pattern;
* recommendations for that user (optional)</li>
* <li>--numRecommendations (integer): Number of recommendations to compute per user (10)</li>
* <li>--booleanData (boolean): Treat input data as having no pref values (false)</li>
- * <li>--maxPrefsPerUser (integer): Maximum number of preferences considered per user in final recommendation phase (10)</li>
+ * <li>--maxPrefsPerUser (integer): Maximum number of preferences considered per user in final
+ * recommendation phase (10)</li>
* <li>--maxSimilaritiesPerItem (integer): Maximum number of similarities considered per item (100)</li>
* <li>--minPrefsPerUser (integer): ignore users with less preferences than this in the similarity computation (1)</li>
- * <li>--maxPrefsPerUserInItemSimilarity (integer): max number of preferences to consider per user in the item similarity computation phase,
+ * <li>--maxPrefsPerUserInItemSimilarity (integer): max number of preferences to consider per user in
+ * the item similarity computation phase,
* users with more preferences will be sampled down (1000)</li>
* <li>--threshold (double): discard item pairs with a similarity value below this</li>
* </ol>
@@ -152,12 +154,13 @@ public final class RecommenderJob extend
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
ToolRunner.run(getConf(), new PreparePreferenceMatrixJob(), new String[]{
- "--input", getInputPath().toString(),
- "--output", prepPath.toString(),
- "--maxPrefsPerUser", String.valueOf(maxPrefsPerUserInItemSimilarity),
- "--minPrefsPerUser", String.valueOf(minPrefsPerUser),
- "--booleanData", String.valueOf(booleanData),
- "--tempDir", getTempPath().toString()});
+ "--input", getInputPath().toString(),
+ "--output", prepPath.toString(),
+ "--maxPrefsPerUser", String.valueOf(maxPrefsPerUserInItemSimilarity),
+ "--minPrefsPerUser", String.valueOf(minPrefsPerUser),
+ "--booleanData", String.valueOf(booleanData),
+ "--tempDir", getTempPath().toString(),
+ });
numberOfUsers = HadoopUtil.readInt(new Path(prepPath, PreparePreferenceMatrixJob.NUM_USERS), getConf());
}
@@ -175,14 +178,15 @@ public final class RecommenderJob extend
* new DistributedRowMatrix(...).rowSimilarity(...) */
//calculate the co-occurrence matrix
ToolRunner.run(getConf(), new RowSimilarityJob(), new String[]{
- "--input", new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(),
- "--output", similarityMatrixPath.toString(),
- "--numberOfColumns", String.valueOf(numberOfUsers),
- "--similarityClassname", similarityClassname,
- "--maxSimilaritiesPerRow", String.valueOf(maxSimilaritiesPerItem),
- "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE),
- "--threshold", String.valueOf(threshold),
- "--tempDir", getTempPath().toString()});
+ "--input", new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(),
+ "--output", similarityMatrixPath.toString(),
+ "--numberOfColumns", String.valueOf(numberOfUsers),
+ "--similarityClassname", similarityClassname,
+ "--maxSimilaritiesPerRow", String.valueOf(maxSimilaritiesPerItem),
+ "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE),
+ "--threshold", String.valueOf(threshold),
+ "--tempDir", getTempPath().toString(),
+ });
}
//start the multiplication of the co-occurrence matrix by the user vectors
@@ -193,21 +197,29 @@ public final class RecommenderJob extend
Reducer.class, VarIntWritable.class, VectorOrPrefWritable.class,
SequenceFileOutputFormat.class);
boolean succeeded = prePartialMultiply1.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
//continue the multiplication
Job prePartialMultiply2 = prepareJob(new Path(prepPath, PreparePreferenceMatrixJob.USER_VECTORS),
- prePartialMultiplyPath2, SequenceFileInputFormat.class, UserVectorSplitterMapper.class, VarIntWritable.class,
- VectorOrPrefWritable.class, Reducer.class, VarIntWritable.class, VectorOrPrefWritable.class,
- SequenceFileOutputFormat.class);
+ prePartialMultiplyPath2,
+ SequenceFileInputFormat.class,
+ UserVectorSplitterMapper.class,
+ VarIntWritable.class,
+ VectorOrPrefWritable.class,
+ Reducer.class,
+ VarIntWritable.class,
+ VectorOrPrefWritable.class,
+ SequenceFileOutputFormat.class);
if (usersFile != null) {
prePartialMultiply2.getConfiguration().set(UserVectorSplitterMapper.USERS_FILE, usersFile);
}
prePartialMultiply2.getConfiguration().setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED,
maxPrefsPerUser);
succeeded = prePartialMultiply2.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
//finish the job
Job partialMultiply = prepareJob(
new Path(prePartialMultiplyPath1 + "," + prePartialMultiplyPath2), partialMultiplyPath,
@@ -216,8 +228,9 @@ public final class RecommenderJob extend
SequenceFileOutputFormat.class);
setS3SafeCombinedInputPath(partialMultiply, getTempPath(), prePartialMultiplyPath1, prePartialMultiplyPath2);
succeeded = partialMultiply.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
}
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -229,8 +242,9 @@ public final class RecommenderJob extend
ItemFilterAsVectorAndPrefsReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class,
SequenceFileOutputFormat.class);
boolean succeeded = itemFiltering.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
}
String aggregateAndRecommendInput = partialMultiplyPath.toString();
@@ -257,8 +271,9 @@ public final class RecommenderJob extend
aggregateAndRecommendConf.setInt(AggregateAndRecommendReducer.NUM_RECOMMENDATIONS, numRecommendations);
aggregateAndRecommendConf.setBoolean(BOOLEAN_DATA, booleanData);
boolean succeeded = aggregateAndRecommend.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
}
return 0;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/PreparePreferenceMatrixJob.java Wed Jun 20 12:07:50 2012
@@ -81,9 +81,16 @@ public class PreparePreferenceMatrixJob
return -1;
}
//convert user preferences into a vector per user
- Job toUserVectors = prepareJob(getInputPath(), getOutputPath(USER_VECTORS), TextInputFormat.class,
- ToItemPrefsMapper.class, VarLongWritable.class, booleanData ? VarLongWritable.class : EntityPrefWritable.class,
- ToUserVectorsReducer.class, VarLongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
+ Job toUserVectors = prepareJob(getInputPath(),
+ getOutputPath(USER_VECTORS),
+ TextInputFormat.class,
+ ToItemPrefsMapper.class,
+ VarLongWritable.class,
+ booleanData ? VarLongWritable.class : EntityPrefWritable.class,
+ ToUserVectorsReducer.class,
+ VarLongWritable.class,
+ VectorWritable.class,
+ SequenceFileOutputFormat.class);
toUserVectors.getConfiguration().setBoolean(RecommenderJob.BOOLEAN_DATA, booleanData);
toUserVectors.getConfiguration().setInt(ToUserVectorsReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser);
toUserVectors.getConfiguration().set(ToEntityPrefsMapper.RATING_SHIFT, String.valueOf(ratingShift));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java Wed Jun 20 12:07:50 2012
@@ -47,7 +47,7 @@ public class ToItemVectorsMapper
@Override
protected void map(VarLongWritable rowIndex, VectorWritable vectorWritable, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
Vector userRatings = vectorWritable.get();
int numElementsBeforeSampling = userRatings.getNumNondefaultElements();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java Wed Jun 20 12:07:50 2012
@@ -27,7 +27,7 @@ public class ToItemVectorsReducer extend
@Override
protected void reduce(IntWritable row, Iterable<VectorWritable> vectors, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
VectorWritable vectorWritable = VectorWritable.merge(vectors.iterator());
vectorWritable.setWritesLaxPrecision(true);
ctx.write(row, vectorWritable);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Wed Jun 20 12:07:50 2012
@@ -129,13 +129,14 @@ public final class ItemSimilarityJob ext
AtomicInteger currentPhase = new AtomicInteger();
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- ToolRunner.run(getConf(), new PreparePreferenceMatrixJob(), new String[]{
- "--input", getInputPath().toString(),
- "--output", prepPath.toString(),
- "--maxPrefsPerUser", String.valueOf(maxPrefsPerUser),
- "--minPrefsPerUser", String.valueOf(minPrefsPerUser),
- "--booleanData", String.valueOf(booleanData),
- "--tempDir", getTempPath().toString() });
+ ToolRunner.run(getConf(), new PreparePreferenceMatrixJob(), new String[] {
+ "--input", getInputPath().toString(),
+ "--output", prepPath.toString(),
+ "--maxPrefsPerUser", String.valueOf(maxPrefsPerUser),
+ "--minPrefsPerUser", String.valueOf(minPrefsPerUser),
+ "--booleanData", String.valueOf(booleanData),
+ "--tempDir", getTempPath().toString(),
+ });
}
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -143,14 +144,15 @@ public final class ItemSimilarityJob ext
getConf());
ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] {
- "--input", new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(),
- "--output", similarityMatrixPath.toString(),
- "--numberOfColumns", String.valueOf(numberOfUsers),
- "--similarityClassname", similarityClassName,
- "--maxSimilaritiesPerRow", String.valueOf(maxSimilarItemsPerItem),
- "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE),
- "--threshold", String.valueOf(threshold),
- "--tempDir", getTempPath().toString() });
+ "--input", new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(),
+ "--output", similarityMatrixPath.toString(),
+ "--numberOfColumns", String.valueOf(numberOfUsers),
+ "--similarityClassname", similarityClassName,
+ "--maxSimilaritiesPerRow", String.valueOf(maxSimilarItemsPerItem),
+ "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE),
+ "--threshold", String.valueOf(threshold),
+ "--tempDir", getTempPath().toString(),
+ });
}
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -217,7 +219,7 @@ public final class ItemSimilarityJob ext
extends Reducer<EntityEntityWritable,DoubleWritable,EntityEntityWritable,DoubleWritable> {
@Override
protected void reduce(EntityEntityWritable pair, Iterable<DoubleWritable> values, Context ctx)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
ctx.write(pair, values.iterator().next());
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java Wed Jun 20 12:07:50 2012
@@ -39,11 +39,8 @@ public final class FullRunningAverageAnd
@Override
public String toString() {
- return new StringBuilder()
- .append(average.getAverage()).append('\t')
- .append(average.getCount()).append('\t')
- .append(average.getMk()).append('\t')
- .append(average.getSk()).toString();
+ return String.valueOf(average.getAverage())
+ + '\t' + average.getCount() + '\t' + average.getMk() + '\t' + average.getSk();
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java Wed Jun 20 12:07:50 2012
@@ -70,8 +70,9 @@ public final class SlopeOneAverageDiffsJ
FloatWritable.class,
SequenceFileOutputFormat.class);
boolean succeeded = prefsToDiffsJob.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
}
@@ -88,8 +89,9 @@ public final class SlopeOneAverageDiffsJ
TextOutputFormat.class);
FileOutputFormat.setOutputCompressorClass(diffsToAveragesJob, GzipCodec.class);
boolean succeeded = diffsToAveragesJob.waitForCompletion(true);
- if (!succeeded)
+ if (!succeeded) {
return -1;
+ }
}
return 0;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastByIDMap.java Wed Jun 20 12:07:50 2012
@@ -175,9 +175,7 @@ public final class FastByIDMap<V> implem
public V put(long key, V value) {
Preconditions.checkArgument(key != NULL && key != REMOVED);
- if (value == null) {
- throw new NullPointerException();
- }
+ Preconditions.checkNotNull(value);
// If less than half the slots are open, let's clear it up
if (numSlotsUsed * loadFactor >= keys.length) {
// If over half the slots used are actual entries, let's grow
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastIDSet.java Wed Jun 20 12:07:50 2012
@@ -64,7 +64,8 @@ public final class FastIDSet implements
Preconditions.checkArgument(size < max, "size must be less than %d", max);
int hashSize = RandomUtils.nextTwinPrime((int) (loadFactor * size));
keys = new long[hashSize];
- Arrays.fill(keys, NULL); }
+ Arrays.fill(keys, NULL);
+ }
/**
* @see #findForAdd(long)
@@ -253,9 +254,7 @@ public final class FastIDSet implements
numSlotsUsed = 0;
keys = new long[newHashSize];
Arrays.fill(keys, NULL);
- int length = oldKeys.length;
- for (int i = 0; i < length; i++) {
- long key = oldKeys[i];
+ for (long key : oldKeys) {
if (key != NULL && key != REMOVED) {
add(key);
}
@@ -295,9 +294,7 @@ public final class FastIDSet implements
public int hashCode() {
int hash = 0;
long[] keys = this.keys;
- int max = keys.length;
- for (int i = 0; i < max; i++) {
- long key = keys[i];
+ for (long key : keys) {
if (key != NULL && key != REMOVED) {
hash = 31 * hash + ((int) (key >> 32) ^ (int) key);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java Wed Jun 20 12:07:50 2012
@@ -178,9 +178,8 @@ public final class FastMap<K,V> implemen
*/
@Override
public V put(K key, V value) {
- if (key == null || value == null) {
- throw new NullPointerException();
- }
+ Preconditions.checkNotNull(key);
+ Preconditions.checkNotNull(value);
// If less than half the slots are open, let's clear it up
if (numSlotsUsed * loadFactor >= keys.length) {
// If over half the slots used are actual entries, let's grow
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java Wed Jun 20 12:07:50 2012
@@ -77,8 +77,8 @@ public final class GenericRecommenderIRS
public GenericRecommenderIRStatsEvaluator(RelevantItemsDataSplitter dataSplitter) {
Preconditions.checkNotNull(dataSplitter);
- random = RandomUtils.getRandom();
- this.dataSplitter = dataSplitter;
+ random = RandomUtils.getRandom();
+ this.dataSplitter = dataSplitter;
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRelevantItemsDataSplitter.java Wed Jun 20 12:07:50 2012
@@ -66,7 +66,7 @@ public final class GenericRelevantItemsD
for (Preference pref : prefs2Array) {
prefs2.add(pref);
}
- for (Iterator<Preference> iterator = prefs2.iterator(); iterator.hasNext(); ) {
+ for (Iterator<Preference> iterator = prefs2.iterator(); iterator.hasNext();) {
Preference pref = iterator.next();
if (relevantItemIDs.contains(pref.getItemID())) {
iterator.remove();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java Wed Jun 20 12:07:50 2012
@@ -348,8 +348,8 @@ public final class OrderBasedRecommender
private static double getMeanRank(int[] ranks) {
int nitems = ranks.length;
double sum = 0.0;
- for (int i = 0; i < nitems; i++) {
- sum += ranks[i];
+ for (int rank : ranks) {
+ sum += rank;
}
return sum / nitems;
}
@@ -357,9 +357,9 @@ public final class OrderBasedRecommender
private static double getMeanWplus(double[] ranks) {
int nitems = ranks.length;
double sum = 0.0;
- for (int i = 0; i < nitems; i++) {
- if (ranks[i] > 0) {
- sum += ranks[i];
+ for (double rank : ranks) {
+ if (rank > 0) {
+ sum += rank;
}
}
return sum / nitems;
@@ -368,9 +368,9 @@ public final class OrderBasedRecommender
private static double getMeanWminus(double[] ranks) {
int nitems = ranks.length;
double sum = 0.0;
- for (int i = 0; i < nitems; i++) {
- if (ranks[i] < 0) {
- sum -= ranks[i];
+ for (double rank : ranks) {
+ if (rank < 0) {
+ sum -= rank;
}
}
return sum / nitems;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java Wed Jun 20 12:07:50 2012
@@ -275,9 +275,9 @@ public final class GenericBooleanPrefDat
if (userIDs2 == null) {
return 0;
}
- return userIDs1.size() < userIDs2.size() ?
- userIDs2.intersectionSize(userIDs1) :
- userIDs1.intersectionSize(userIDs2);
+ return userIDs1.size() < userIDs2.size()
+ ? userIDs2.intersectionSize(userIDs1)
+ : userIDs1.intersectionSize(userIDs2);
}
@Override
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java Wed Jun 20 12:07:50 2012
@@ -103,15 +103,15 @@ public final class KnnItemBasedRecommend
int j = 0;
double value = 0.0;
for (long jitem : itemNeighborhood) {
- if (jitem == -1) {
- continue;
- }
- for (long user : usersRatedNeighborhood) {
- float prefVJ = dataModel.getPreferenceValue(user, iitem);
- float prefVK = dataModel.getPreferenceValue(user, jitem);
+ if (jitem == -1) {
+ continue;
+ }
+ for (long user : usersRatedNeighborhood) {
+ float prefVJ = dataModel.getPreferenceValue(user, iitem);
+ float prefVK = dataModel.getPreferenceValue(user, jitem);
value += prefVJ * prefVK;
}
- aMatrix[i][j] = value/numUsers;
+ aMatrix[i][j] = value / numUsers;
j++;
}
i++;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java Wed Jun 20 12:07:50 2012
@@ -28,14 +28,12 @@ import java.util.concurrent.locks.Reentr
import java.util.regex.Pattern;
import org.apache.mahout.cf.taste.common.Refreshable;
-import org.apache.mahout.cf.taste.common.Weighting;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
-import org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;
import org.apache.mahout.common.iterator.FileLineIterator;
@@ -56,7 +54,8 @@ import com.google.common.base.Preconditi
* The fourth column is optional, and is a count representing the number of occurrences of the item-item pair
* that contribute to the diff. It is assumed to be 1 if not present. The fifth and sixth arguments are
* computed values used by {@link FullRunningAverageAndStdDev} implementations to compute a running standard deviation.
- * They are required if using {@link Weighting#WEIGHTED} with {@link SlopeOneRecommender}.
+ * They are required if using {@link org.apache.mahout.cf.taste.common.Weighting#WEIGHTED}
+ * with {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender}.
* </p>
*
* <p>
@@ -142,7 +141,7 @@ public final class FileDiffStorage imple
}
String[] tokens = SEPARATOR.split(line);
- Preconditions.checkArgument(tokens.length >=3 && tokens.length != 5, "Bad line: %s", line);
+ Preconditions.checkArgument(tokens.length >= 3 && tokens.length != 5, "Bad line: %s", line);
long itemID1 = Long.parseLong(tokens[0]);
long itemID2 = Long.parseLong(tokens[1]);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ALSWRFactorizer.java Wed Jun 20 12:07:50 2012
@@ -133,7 +133,6 @@ public class ALSWRFactorizer extends Abs
@Override
public Factorization factorize() throws TasteException {
log.info("starting to compute the factorization...");
- final AlternatingLeastSquaresSolver solver = new AlternatingLeastSquaresSolver();
final Features features = new Features(this);
for (int iteration = 0; iteration < numIterations; iteration++) {
@@ -155,7 +154,8 @@ public class ALSWRFactorizer extends Abs
long itemID = itemIDsFromUser.nextLong();
featureVectors.add(features.getItemFeatureColumn(itemIndex(itemID)));
}
- Vector userFeatures = solver.solve(featureVectors, ratingVector(userPrefs), lambda, numFeatures);
+ Vector userFeatures =
+ AlternatingLeastSquaresSolver.solve(featureVectors, ratingVector(userPrefs), lambda, numFeatures);
features.setFeatureColumnInU(userIndex(userID), userFeatures);
}
});
@@ -184,7 +184,8 @@ public class ALSWRFactorizer extends Abs
long userID = pref.getUserID();
featureVectors.add(features.getUserFeatureColumn(userIndex(userID)));
}
- Vector itemFeatures = solver.solve(featureVectors, ratingVector(itemPrefs), lambda, numFeatures);
+ Vector itemFeatures =
+ AlternatingLeastSquaresSolver.solve(featureVectors, ratingVector(itemPrefs), lambda, numFeatures);
features.setFeatureColumnInM(itemIndex(itemID), itemFeatures);
}
});
@@ -203,11 +204,11 @@ public class ALSWRFactorizer extends Abs
return createFactorization(features.getU(), features.getM());
}
- protected ExecutorService createQueue() {
+ protected static ExecutorService createQueue() {
return Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
}
- protected Vector ratingVector(PreferenceArray prefs) {
+ protected static Vector ratingVector(PreferenceArray prefs) {
double[] ratings = new double[prefs.length()];
for (int n = 0; n < prefs.length(); n++) {
ratings[n] = prefs.get(n).getValue();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/svd/ImplicitLinearRegressionFactorizer.java Wed Jun 20 12:07:50 2012
@@ -60,7 +60,7 @@ public final class ImplicitLinearRegress
private double[][] itemMatrix;
private Matrix userTransUser;
private Matrix itemTransItem;
- Collection<Callable<Void>> fVectorCallables;
+ private Collection<Callable<Void>> fVectorCallables;
private boolean recomputeUserFeatures;
private RunningAverage avrChange;
@@ -177,13 +177,12 @@ public final class ImplicitLinearRegress
return ids;
}
- private Matrix ones(int size) {
+ private static Matrix ones(int size) {
double[] vector = new double[size];
for (int i = 0; i < size; i++) {
vector[i] = 1;
}
- Matrix ones = new DiagonalMatrix(vector);
- return ones;
+ return new DiagonalMatrix(vector);
}
private double getAveragePreference() throws TasteException {
@@ -191,9 +190,8 @@ public final class ImplicitLinearRegress
LongPrimitiveIterator it = dataModel.getUserIDs();
while (it.hasNext()) {
int count = 0;
- PreferenceArray prefs;
try {
- prefs = dataModel.getPreferencesFromUser(it.nextLong());
+ PreferenceArray prefs = dataModel.getPreferencesFromUser(it.nextLong());
for (Preference pref : prefs) {
average.addDatum(pref.getValue());
count++;
@@ -241,7 +239,7 @@ public final class ImplicitLinearRegress
}
}
/* calculating cosine similarity to determine when to stop the algorithm, this could be used to detect convergence */
- double cosine = (aTb) / (Math.sqrt(normA) * Math.sqrt(normB));
+ double cosine = aTb / (Math.sqrt(normA) * Math.sqrt(normB));
if (Double.isNaN(cosine)) {
log.info("Cosine similarity is NaN, recomputeUserFeatures=" + recomputeUserFeatures + " id=" + id);
} else {
@@ -258,7 +256,7 @@ public final class ImplicitLinearRegress
avrChange = new FullRunningAverage();
}
- public void buildCallables(Matrix C, Matrix prefVector, int id) throws TasteException {
+ public void buildCallables(Matrix C, Matrix prefVector, int id) {
fVectorCallables.add(new FeatureVectorCallable(C, prefVector, id));
if (fVectorCallables.size() % (200 * Runtime.getRuntime().availableProcessors()) == 0) {
execute(fVectorCallables);
@@ -266,7 +264,7 @@ public final class ImplicitLinearRegress
}
}
- public void finishProcessing() throws TasteException {
+ public void finishProcessing() {
/* run the remaining part */
if (fVectorCallables != null) {
execute(fVectorCallables);
@@ -280,11 +278,11 @@ public final class ImplicitLinearRegress
recomputeUserFeatures = !recomputeUserFeatures;
}
- public Matrix identityV(int size) {
+ public static Matrix identityV(int size) {
return ones(size);
}
- void execute(Collection<Callable<Void>> callables) throws TasteException {
+ static void execute(Collection<Callable<Void>> callables) {
callables = wrapWithStatsCallables(callables);
int numProcessors = Runtime.getRuntime().availableProcessors();
ExecutorService executor = Executors.newFixedThreadPool(numProcessors);
@@ -303,7 +301,7 @@ public final class ImplicitLinearRegress
executor.shutdown();
}
- private Collection<Callable<Void>> wrapWithStatsCallables(Collection<Callable<Void>> callables) {
+ private static Collection<Callable<Void>> wrapWithStatsCallables(Collection<Callable<Void>> callables) {
int size = callables.size();
Collection<Callable<Void>> wrapped = Lists.newArrayListWithExpectedSize(size);
int count = 1;
@@ -355,7 +353,7 @@ public final class ImplicitLinearRegress
}
}
- private Matrix solve(Matrix A, Matrix y) {
+ private static Matrix solve(Matrix A, Matrix y) {
return new QRDecomposition(A).solve(y);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java Wed Jun 20 12:07:50 2012
@@ -17,21 +17,11 @@
package org.apache.mahout.classifier;
-import java.util.Comparator;
-
/**
* Result of a document classification. The label and the associated score (usually probabilty)
*/
public class ClassifierResult {
- public static final Comparator<ClassifierResult> COMPARE_BY_SCORE_AND_LABEL =
- new Comparator<ClassifierResult>() {
- @Override
- public int compare(ClassifierResult cr1, ClassifierResult cr2) {
- return cr1.score < cr2.score ? 1 : cr1.score > cr2.score ? -1 : cr1.label.compareTo(cr2.label);
- }
- };
-
private String label;
private double score;
private double logLikelihood = Double.MAX_VALUE;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java Wed Jun 20 12:07:50 2012
@@ -179,10 +179,9 @@ public class ConfusionMatrix {
}
private static String[] sortLabels(Map<String,Integer> labels) {
- String[] sorted = new String[labels.keySet().size()];
- for (String label: labels.keySet()) {
- Integer index = labels.get(label);
- sorted[index] = label;
+ String[] sorted = new String[labels.size()];
+ for (Map.Entry<String,Integer> entry : labels.entrySet()) {
+ sorted[entry.getValue()] = entry.getKey();
}
return sorted;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java Wed Jun 20 12:07:50 2012
@@ -31,8 +31,7 @@ public class ResultAnalyzer {
private final ConfusionMatrix confusionMatrix;
private final OnlineSummarizer summarizer;
- private boolean hasLL = false;
-
+ private boolean hasLL;
/*
* === Summary ===
*
@@ -41,7 +40,6 @@ public class ResultAnalyzer {
* relative squared error 37.2742 % Total Number of Instances 683
*/
private int correctlyClassified;
-
private int incorrectlyClassified;
public ResultAnalyzer(Collection<String> labelSet, String defaultLabel) {
@@ -101,8 +99,8 @@ public class ResultAnalyzer {
returnString.append(confusionMatrix);
if (hasLL) {
returnString.append("\n\n");
- returnString.append("Avg. Log-likelihood: ").append(summarizer.getMean()).append(" 25%-ile: ").append(summarizer.getQuartile(1))
- .append(" 75%-ile: ").append(summarizer.getQuartile(2));
+ returnString.append("Avg. Log-likelihood: ").append(summarizer.getMean()).append(" 25%-ile: ")
+ .append(summarizer.getQuartile(1)).append(" 75%-ile: ").append(summarizer.getQuartile(2));
}
return returnString.toString();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/data/DataLoader.java Wed Jun 20 12:07:50 2012
@@ -114,7 +114,7 @@ public final class DataLoader {
public static Data loadData(Dataset dataset, FileSystem fs, Path fpath) throws IOException {
FSDataInputStream input = fs.open(fpath);
- Scanner scanner = new Scanner(input);
+ Scanner scanner = new Scanner(input, "UTF-8");
List<Instance> instances = Lists.newArrayList();
@@ -188,7 +188,7 @@ public final class DataLoader {
Attribute[] attrs = DescriptorUtils.parseDescriptor(descriptor);
FSDataInputStream input = fs.open(path);
- Scanner scanner = new Scanner(input);
+ Scanner scanner = new Scanner(input, "UTF-8");
// used to convert CATEGORICAL attribute to Integer
@SuppressWarnings("unchecked")
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/mapreduce/inmem/InMemInputFormat.java Wed Jun 20 12:07:50 2012
@@ -64,9 +64,8 @@ public class InMemInputFormat extends In
}
@Override
- public RecordReader<IntWritable,NullWritable> createRecordReader(InputSplit split,
- TaskAttemptContext context) throws IOException,
- InterruptedException {
+ public RecordReader<IntWritable,NullWritable> createRecordReader(InputSplit split, TaskAttemptContext context)
+ throws IOException, InterruptedException {
Preconditions.checkArgument(split instanceof InMemInputSplit);
return new InMemRecordReader((InMemInputSplit) split);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/split/RegressionSplit.java Wed Jun 20 12:07:50 2012
@@ -20,7 +20,9 @@ package org.apache.mahout.classifier.df.
import org.apache.mahout.classifier.df.data.Data;
import org.apache.mahout.classifier.df.data.Instance;
+import java.io.Serializable;
import java.util.Arrays;
+import java.util.Comparator;
/**
* Regression problem implementation of IgSplit.
@@ -31,7 +33,7 @@ public class RegressionSplit extends IgS
/**
* Comparator for Instance sort
*/
- private static class InstanceComparator implements java.util.Comparator<Instance> {
+ private static class InstanceComparator implements Comparator<Instance>, Serializable {
private final int attr;
InstanceComparator(int attr) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/ForestVisualizer.java Wed Jun 20 12:07:50 2012
@@ -17,7 +17,10 @@
package org.apache.mahout.classifier.df.tools;
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
+import java.util.Collection;
import java.util.List;
import org.apache.commons.cli2.CommandLine;
@@ -46,17 +49,25 @@ public final class ForestVisualizer {
private ForestVisualizer() {
}
- public static String toString(DecisionForest forest, Dataset dataset, String[] attrNames)
- throws Exception {
- Method getTrees = forest.getClass().getDeclaredMethod("getTrees");
- getTrees.setAccessible(true);
- @SuppressWarnings("unchecked")
- List<Node> trees = (List<Node>) getTrees.invoke(forest);
-
+ public static String toString(DecisionForest forest, Dataset dataset, String[] attrNames) {
+
+ List<Node> trees;
+ try {
+ Method getTrees = forest.getClass().getDeclaredMethod("getTrees");
+ getTrees.setAccessible(true);
+ trees = (List<Node>) getTrees.invoke(forest);
+ } catch (IllegalAccessException e) {
+ throw new IllegalStateException(e);
+ } catch (InvocationTargetException e) {
+ throw new IllegalStateException(e);
+ } catch (NoSuchMethodException e) {
+ throw new IllegalStateException(e);
+ }
+
int cnt = 1;
StringBuilder buff = new StringBuilder();
for (Node tree : trees) {
- buff.append("Tree[" + cnt + "]:");
+ buff.append("Tree[").append(cnt).append("]:");
buff.append(TreeVisualizer.toString(tree, dataset, attrNames));
buff.append('\n');
cnt++;
@@ -73,8 +84,7 @@ public final class ForestVisualizer {
* @param attrNames
* attribute names
*/
- public static String toString(String forestPath, String datasetPath, String[] attrNames)
- throws Exception {
+ public static String toString(String forestPath, String datasetPath, String[] attrNames) throws IOException {
Configuration conf = new Configuration();
DecisionForest forest = DecisionForest.load(conf, new Path(forestPath));
Dataset dataset = Dataset.load(conf, new Path(datasetPath));
@@ -90,8 +100,7 @@ public final class ForestVisualizer {
* @param attrNames
* attribute names
*/
- public static void print(String forestPath, String datasetPath, String[] attrNames)
- throws Exception {
+ public static void print(String forestPath, String datasetPath, String[] attrNames) throws IOException {
System.out.println(toString(forestPath, datasetPath, attrNames));
}
@@ -132,7 +141,7 @@ public final class ForestVisualizer {
String modelName = cmdLine.getValue(modelOpt).toString();
String[] attrNames = null;
if (cmdLine.hasOption(attrNamesOpt)) {
- List<String> names = (List<String>) cmdLine.getValues(attrNamesOpt);
+ Collection<String> names = (Collection<String>) cmdLine.getValues(attrNamesOpt);
if (!names.isEmpty()) {
attrNames = new String[names.size()];
names.toArray(attrNames);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java?rev=1352052&r1=1352051&r2=1352052&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/df/tools/Frequencies.java Wed Jun 20 12:07:50 2012
@@ -107,17 +107,10 @@ public final class Frequencies extends C
int[][] counts = job.run(getConf());
- // compute the partitions' sizes
- int numPartitions = counts.length;
- // int[] sizes = new int[numPartitions]; // TODO this isn't used?
- // for (int p = 0; p < numPartitions; p++) {
- // sizes[p] = DataUtils.sum(counts[p]);
- // }
-
// outputing the frequencies
log.info("counts[partition][class]");
- for (int p = 0; p < numPartitions; p++) {
- log.info(Arrays.toString(counts[p]));
+ for (int[] count : counts) {
+ log.info(Arrays.toString(count));
}
}