You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by tc...@apache.org on 2012/05/15 03:59:01 UTC
svn commit: r1338501 [1/3] - in /mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/impl/model/
core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/
core/src/main/java/org/apache/mahout/classifier/
core/src/main/java/org/apache/...
Author: tcp
Date: Tue May 15 01:58:58 2012
New Revision: 1338501
URL: http://svn.apache.org/viewvc?rev=1338501&view=rev
Log:
MAHOUT-987: first round of style fixes; lots of whitespace fixes
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/ConjugateGradientOptimizer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDASampler.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterator.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/DistributedRowMatrixWriter.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/StandardDeviationCalculatorMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/Sampler.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/Entropy.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFiles.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluationRunner.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/evaluation/AucTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegressionTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/ClusteringTestUtils.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/TestMapReduce.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/lda/cvb/TestCVBModelTrainer.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/MatrixWritableTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stats/BasicStatsTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFilesTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/InteractionValueEncoderTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToRecMapper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/email/PrepEmailReducer.java
mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/ValidateAdaptiveLogistic.java
mahout/trunk/integration/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchives.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/MatrixDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/CSVClusterWriter.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/VectorHelper.java
mahout/trunk/integration/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/GenericPermuting.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosSolver.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/decomposer/lanczos/LanczosState.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/math/Arithmetic.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/Gamma.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix1D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleMatrix2D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/AbstractMatrix2D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix1D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/DenseDoubleMatrix2D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/SparseDoubleMatrix2D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/impl/WrapperDoubleMatrix2D.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Property.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/MatrixTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/SolverTest.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/hebbian/TestHebbianSolver.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/decomposer/lanczos/TestLanczosSolver.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/ExponentialTest.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java Tue May 15 01:58:58 2012
@@ -185,7 +185,7 @@ public final class GenericItemPreference
}
swapped = false;
int max = length - gap;
- for (int i = 0; i < max; i++){
+ for (int i = 0; i < max; i++) {
int other = i + gap;
if (isLess(other, i, type)) {
swap(i, other);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java Tue May 15 01:58:58 2012
@@ -191,7 +191,7 @@ public final class GenericUserPreference
}
swapped = false;
int max = length - gap;
- for (int i = 0; i < max; i++){
+ for (int i = 0; i < max; i++) {
int other = i + gap;
if (isLess(other, i, type)) {
swap(i, other);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/ConjugateGradientOptimizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/ConjugateGradientOptimizer.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/ConjugateGradientOptimizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/ConjugateGradientOptimizer.java Tue May 15 01:58:58 2012
@@ -41,7 +41,7 @@ public final class ConjugateGradientOpti
* a = (r'*w)/(w'*z);
* x = x + a*w;
* r = r - a*z;
- * if( norm(r) < 1e-10 )
+ * if ( norm(r) < 1e-10 )
* break;
* end
* B = (r'*z)/(w'*z);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java Tue May 15 01:58:58 2012
@@ -145,7 +145,7 @@ public class ConfusionMatrix {
}
}
Map<String,Integer> labels = Maps.newHashMap();
- for(Map.Entry<String, Integer> entry : labelMap.entrySet()) {
+ for (Map.Entry<String, Integer> entry : labelMap.entrySet()) {
labels.put(entry.getKey(), entry.getValue());
}
m.setRowLabelBindings(labels);
@@ -172,7 +172,7 @@ public class ConfusionMatrix {
String[] sorted = sortLabels(labels);
verifyLabels(length, sorted);
labelMap.clear();
- for(int i = 0; i < length; i++) {
+ for (int i = 0; i < length; i++) {
labelMap.put(sorted[i], i);
}
}
@@ -180,7 +180,7 @@ public class ConfusionMatrix {
private static String[] sortLabels(Map<String,Integer> labels) {
String[] sorted = new String[labels.keySet().size()];
- for(String label: labels.keySet()) {
+ for (String label: labels.keySet()) {
Integer index = labels.get(label);
sorted[index] = label;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java Tue May 15 01:58:58 2012
@@ -69,7 +69,7 @@ public class ResultAnalyzer {
incorrectlyClassified++;
}
confusionMatrix.addInstance(correctLabel, classifiedResult);
- if (classifiedResult.getLogLikelihood() != Double.MAX_VALUE){
+ if (classifiedResult.getLogLikelihood() != Double.MAX_VALUE) {
summarizer.add(classifiedResult.getLogLikelihood());
hasLL = true;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java Tue May 15 01:58:58 2012
@@ -117,7 +117,7 @@ public final class BayesUtils {
try {
for (Object label : labels) {
String theLabel = ((Pair<?,?>) label).getFirst().toString();
- if (!seen.contains(theLabel)){
+ if (!seen.contains(theLabel)) {
writer.append(new Text(theLabel), new IntWritable(i++));
seen.add(theLabel);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/CsvRecordFactory.java Tue May 15 01:58:58 2012
@@ -113,7 +113,7 @@ public class CsvRecordFactory implements
targetDictionary = new Dictionary();
}
- public CsvRecordFactory(String targetName, String idName, Map<String, String> typeMap){
+ public CsvRecordFactory(String targetName, String idName, Map<String, String> typeMap) {
this(targetName, typeMap);
this.idName = idName;
}
@@ -176,7 +176,7 @@ public class CsvRecordFactory implements
target = vars.get(targetName);
// record id column
- if (idName != null){
+ if (idName != null) {
id = vars.get(idName);
}
@@ -317,7 +317,7 @@ public class CsvRecordFactory implements
* @param line the line of content read from CSV file
* @return the id value of the CSV record
*/
- public String getIdString(CharSequence line){
+ public String getIdString(CharSequence line) {
List<String> values = Lists.newArrayList(COMMA.split(line));
return values.get(id);
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyDriver.java Tue May 15 01:58:58 2012
@@ -111,7 +111,7 @@ public class CanopyDriver extends Abstra
.equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD);
DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class);
double clusterClassificationThreshold = 0.0;
- if(hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)){
+ if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
}
run(conf, input, output, measure, t1, t2, t3, t4, clusterFilter,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Tue May 15 01:58:58 2012
@@ -269,7 +269,7 @@ public class FuzzyKMeansDriver extends A
List<Cluster> clusters = new ArrayList<Cluster>();
FuzzyKMeansUtil.configureWithClusterInfo(clustersIn, clusters);
- if(conf==null){
+ if (conf==null) {
conf = new Configuration();
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansUtil.java Tue May 15 01:58:58 2012
@@ -44,7 +44,7 @@ final class FuzzyKMeansUtil {
new Configuration())) {
Class<? extends Writable> valueClass = value.getClass();
- if(valueClass.equals(ClusterWritable.class)){
+ if (valueClass.equals(ClusterWritable.class)) {
ClusterWritable clusterWritable = (ClusterWritable)value;
value = clusterWritable.getValue();
valueClass = value.getClass();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansUtil.java Tue May 15 01:58:58 2012
@@ -41,7 +41,7 @@ final class KMeansUtil {
for (Writable value :
new SequenceFileDirValueIterable<Writable>(clusterPath, PathType.LIST, PathFilters.partFilter(), conf)) {
Class<? extends Writable> valueClass = value.getClass();
- if(valueClass.equals(ClusterWritable.class)){
+ if (valueClass.equals(ClusterWritable.class)) {
ClusterWritable clusterWritable = (ClusterWritable)value;
value = clusterWritable.getValue();
valueClass = value.getClass();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDADriver.java Tue May 15 01:58:58 2012
@@ -178,7 +178,7 @@ public final class LDADriver extends Abs
for (FileStatus fstatus : fs.globStatus(new Path(stateDir, "state-*"))) {
try {
int iteration = Integer.parseInt(fstatus.getPath().getName().split("-")[1]);
- if(iteration > maxIteration) {
+ if (iteration > maxIteration) {
maxIteration = iteration;
lastPath = fstatus.getPath();
}
@@ -260,7 +260,7 @@ public final class LDADriver extends Abs
stateIn = stateOut;
oldLL = ll;
}
- if(runSequential) {
+ if (runSequential) {
computeDocumentTopicProbabilitiesSequential(conf, input, new Path(output, "docTopics"));
} else {
computeDocumentTopicProbabilities(conf,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDASampler.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDASampler.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDASampler.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/LDASampler.java Tue May 15 01:58:58 2012
@@ -37,7 +37,7 @@ public class LDASampler {
public LDASampler(Matrix model, Random random) {
this.random = random;
samplers = new Sampler[model.numRows()];
- for(int i = 0; i < samplers.length; i++) {
+ for (int i = 0; i < samplers.length; i++) {
samplers[i] = new Sampler(random, model.viewRow(i));
}
}
@@ -56,7 +56,7 @@ public class LDASampler {
"topicDistribution must have same cardinality as the sampling model");
int[] samples = new int[numSamples];
Sampler topicSampler = new Sampler(random, topicDistribution);
- for(int i = 0; i < numSamples; i++) {
+ for (int i = 0; i < numSamples; i++) {
samples[i] = samplers[topicSampler.sample()].sample();
}
return samples;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0DocInferenceMapper.java Tue May 15 01:58:58 2012
@@ -35,7 +35,7 @@ public class CVB0DocInferenceMapper exte
Matrix docModel = new SparseRowMatrix(numTopics, doc.get().size());
int maxIters = getMaxIters();
ModelTrainer modelTrainer = getModelTrainer();
- for(int i = 0; i < maxIters; i++) {
+ for (int i = 0; i < maxIters; i++) {
modelTrainer.getReadModel().trainDocTopicModel(doc.get(), docTopics, docModel);
}
context.write(docId, new VectorWritable(docTopics));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CVB0Driver.java Tue May 15 01:58:58 2012
@@ -152,7 +152,7 @@ public class CVB0Driver extends Abstract
addOption(buildOption(BACKFILL_PERPLEXITY, null,
"enable backfilling of missing perplexity values", false, false, null));
- if(parseArguments(args) == null) {
+ if (parseArguments(args) == null) {
return -1;
}
@@ -285,7 +285,7 @@ public class CVB0Driver extends Abstract
}
long startTime = System.currentTimeMillis();
- while(iterationNumber < maxIterations) {
+ while (iterationNumber < maxIterations) {
// test convergence
if (convergenceDelta > 0.0) {
double delta = rateOfChange(perplexities);
@@ -305,7 +305,7 @@ public class CVB0Driver extends Abstract
maxIterations, numReduceTasks);
// calculate perplexity
- if(testFraction > 0 && iterationNumber % iterationBlockSize == 0) {
+ if (testFraction > 0 && iterationNumber % iterationBlockSize == 0) {
perplexities.add(calculatePerplexity(conf, inputPath, modelOutputPath, iterationNumber));
log.info("Current perplexity = {}", perplexities.get(perplexities.size() - 1));
log.info("(p_{} - p_{}) / p_0 = {}; target = {}", new Object[]{
@@ -325,10 +325,10 @@ public class CVB0Driver extends Abstract
Job docInferenceJob = docTopicOutputPath != null
? writeDocTopicInference(conf, inputPath, finalIterationData, docTopicOutputPath)
: null;
- if(topicModelOutputJob != null && !topicModelOutputJob.waitForCompletion(true)) {
+ if (topicModelOutputJob != null && !topicModelOutputJob.waitForCompletion(true)) {
return -1;
}
- if(docInferenceJob != null && !docInferenceJob.waitForCompletion(true)) {
+ if (docInferenceJob != null && !docInferenceJob.waitForCompletion(true)) {
return -1;
}
return 0;
@@ -336,7 +336,7 @@ public class CVB0Driver extends Abstract
private static double rateOfChange(List<Double> perplexities) {
int sz = perplexities.size();
- if(sz < 2) {
+ if (sz < 2) {
return Double.MAX_VALUE;
}
return Math.abs(perplexities.get(sz - 1) - perplexities.get(sz - 2)) / perplexities.get(0);
@@ -362,7 +362,7 @@ public class CVB0Driver extends Abstract
FileOutputFormat.setOutputPath(job, outputPath);
setModelPaths(job, modelPath);
HadoopUtil.delete(conf, outputPath);
- if(!job.waitForCompletion(true)) {
+ if (!job.waitForCompletion(true)) {
throw new InterruptedException("Failed to calculate perplexity for: " + modelPath);
}
return readPerplexity(conf, modelPath.getParent(), iteration);
@@ -458,7 +458,7 @@ public class CVB0Driver extends Abstract
if (modelInput != null && fs.exists(modelInput)) {
FileStatus[] statuses = fs.listStatus(modelInput, PathFilters.partFilter());
URI[] modelUris = new URI[statuses.length];
- for(int i = 0; i < statuses.length; i++) {
+ for (int i = 0; i < statuses.length; i++) {
modelUris[i] = statuses[i].getPath().toUri();
}
DistributedCache.setCacheFiles(modelUris, conf);
@@ -487,7 +487,7 @@ public class CVB0Driver extends Abstract
FileSystem fs = FileSystem.get(modelTempDir.toUri(), config);
int iterationNumber = 1;
Path iterationPath = modelPath(modelTempDir, iterationNumber);
- while(fs.exists(iterationPath) && iterationNumber <= maxIterations) {
+ while (fs.exists(iterationPath) && iterationNumber <= maxIterations) {
log.info("Found previous state: " + iterationPath);
iterationNumber++;
iterationPath = modelPath(modelTempDir, iterationNumber);
@@ -514,7 +514,7 @@ public class CVB0Driver extends Abstract
FileOutputFormat.setOutputPath(job, modelOutput);
setModelPaths(job, modelInput);
HadoopUtil.delete(conf, modelOutput);
- if(!job.waitForCompletion(true)) {
+ if (!job.waitForCompletion(true)) {
throw new InterruptedException(String.format("Failed to complete iteration %d stage 1",
iterationNumber));
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0Mapper.java Tue May 15 01:58:58 2012
@@ -90,7 +90,7 @@ public class CachingCVB0Mapper
log.info("Initializing read model");
TopicModel readModel;
Path[] modelPaths = CVB0Driver.getModelPaths(conf);
- if(modelPaths != null && modelPaths.length > 0) {
+ if (modelPaths != null && modelPaths.length > 0) {
readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths);
} else {
log.info("No model files found");
@@ -123,7 +123,7 @@ public class CachingCVB0Mapper
log.info("Writing model");
TopicModel model = modelTrainer.getReadModel();
- for(MatrixSlice topic : model) {
+ for (MatrixSlice topic : model) {
context.write(new IntWritable(topic.index()), new VectorWritable(topic.vector()));
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/CachingCVB0PerplexityMapper.java Tue May 15 01:58:58 2012
@@ -73,7 +73,7 @@ public class CachingCVB0PerplexityMapper
log.info("Initializing read model");
TopicModel readModel;
Path[] modelPaths = CVB0Driver.getModelPaths(conf);
- if(modelPaths != null && modelPaths.length > 0) {
+ if (modelPaths != null && modelPaths.length > 0) {
readModel = new TopicModel(conf, eta, alpha, null, numUpdateThreads, modelWeight, modelPaths);
} else {
log.info("No model files found");
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/InMemoryCollapsedVariationalBayes0.java Tue May 15 01:58:58 2012
@@ -116,8 +116,8 @@ public class InMemoryCollapsedVariationa
this.initialModelCorpusFraction = modelCorpusFraction;
numTerms = terms != null ? terms.length : corpus.numCols();
termIdMap = Maps.newHashMap();
- if(terms != null) {
- for(int t=0; t<terms.length; t++) {
+ if (terms != null) {
+ for (int t=0; t<terms.length; t++) {
termIdMap.put(terms[t], t);
}
}
@@ -130,10 +130,10 @@ public class InMemoryCollapsedVariationa
private void postInitCorpus() {
totalCorpusWeight = 0;
int numNonZero = 0;
- for(int i=0; i<numDocuments; i++) {
+ for (int i=0; i<numDocuments; i++) {
Vector v = corpusWeights.viewRow(i);
double norm;
- if(v != null && (norm = v.norm(1)) != 0) {
+ if (v != null && (norm = v.norm(1)) != 0) {
numNonZero += v.getNumNondefaultElements();
totalCorpusWeight += norm;
}
@@ -158,7 +158,7 @@ public class InMemoryCollapsedVariationa
}
private void inferDocuments(double convergence, int maxIter, boolean recalculate) {
- for(int docId = 0; docId < corpusWeights.numRows() ; docId++) {
+ for (int docId = 0; docId < corpusWeights.numRows() ; docId++) {
Vector inferredDocument = topicModel.infer(corpusWeights.viewRow(docId),
docTopicCounts.viewRow(docId));
// do what now?
@@ -172,8 +172,8 @@ public class InMemoryCollapsedVariationa
public void trainDocuments(double testFraction) {
long start = System.nanoTime();
modelTrainer.start();
- for(int docId = 0; docId < corpusWeights.numRows(); docId++) {
- if(testFraction == 0 || docId % (1/testFraction) != 0) {
+ for (int docId = 0; docId < corpusWeights.numRows(); docId++) {
+ if (testFraction == 0 || docId % (1/testFraction) != 0) {
Vector docTopics = new DenseVector(numTopics).assign(1.0/numTopics); // docTopicCounts.getRow(docId)
modelTrainer.trainSync(corpusWeights.viewRow(docId), docTopics , true, 10);
}
@@ -185,7 +185,7 @@ public class InMemoryCollapsedVariationa
/*
private double error(int docId) {
Vector docTermCounts = corpusWeights.viewRow(docId);
- if(docTermCounts == null) {
+ if (docTermCounts == null) {
return 0;
} else {
Vector expectedDocTermCounts =
@@ -199,7 +199,7 @@ public class InMemoryCollapsedVariationa
private double error() {
long time = System.nanoTime();
double error = 0;
- for(int docId = 0; docId < numDocuments; docId++) {
+ for (int docId = 0; docId < numDocuments; docId++) {
error += error(docId);
}
logTime("error calculation", System.nanoTime() - time);
@@ -216,9 +216,9 @@ public class InMemoryCollapsedVariationa
int maxIterations, int minIter, double testFraction) {
int iter = 0;
double oldPerplexity = 0;
- while(iter < minIter) {
+ while (iter < minIter) {
trainDocuments(testFraction);
- if(verbose) {
+ if (verbose) {
log.info("model after: " + iter + ": " + modelTrainer.getReadModel().toString());
}
log.info("iteration " + iter + " complete");
@@ -229,9 +229,9 @@ public class InMemoryCollapsedVariationa
}
double newPerplexity = 0;
double fractionalChange = Double.MAX_VALUE;
- while(iter < maxIterations && fractionalChange > minFractionalErrorChange) {
+ while (iter < maxIterations && fractionalChange > minFractionalErrorChange) {
trainDocuments();
- if(verbose) {
+ if (verbose) {
log.info("model after: " + iter + ": " + modelTrainer.getReadModel().toString());
}
newPerplexity = modelTrainer.calculatePerplexity(corpusWeights, docTopicCounts,
@@ -242,7 +242,7 @@ public class InMemoryCollapsedVariationa
log.info(fractionalChange + " = fractionalChange");
oldPerplexity = newPerplexity;
}
- if(iter < maxIterations) {
+ if (iter < maxIterations) {
log.info(String.format("Converged! fractional error change: %f, error %f",
fractionalChange, newPerplexity));
} else {
@@ -378,7 +378,7 @@ public class InMemoryCollapsedVariationa
long start = System.nanoTime();
- if(conf.get("fs.default.name") == null) {
+ if (conf.get("fs.default.name") == null) {
String dfsNameNode = (String)cmdLine.getValue(dfsOpt);
conf.set("fs.default.name", dfsNameNode);
}
@@ -400,7 +400,7 @@ public class InMemoryCollapsedVariationa
if ("randstart".equalsIgnoreCase(reInferDocTopics)) {
cvb0.inferDocuments(0.0, 100, true);
- } else if("continue".equalsIgnoreCase(reInferDocTopics)) {
+ } else if ("continue".equalsIgnoreCase(reInferDocTopics)) {
cvb0.inferDocuments(0.0, 100, false);
}
@@ -419,15 +419,15 @@ public class InMemoryCollapsedVariationa
private static Map<Integer, Map<String, Integer>> loadCorpus(String path) throws IOException {
List<String> lines = Resources.readLines(Resources.getResource(path), Charsets.UTF_8);
Map<Integer, Map<String, Integer>> corpus = Maps.newHashMap();
- for(int i=0; i<lines.size(); i++) {
+ for (int i=0; i<lines.size(); i++) {
String line = lines.get(i);
Map<String, Integer> doc = Maps.newHashMap();
- for(String s : line.split(" ")) {
+ for (String s : line.split(" ")) {
s = s.replaceAll("\\W", "").toLowerCase().trim();
- if(s.length() == 0) {
+ if (s.length() == 0) {
continue;
}
- if(!doc.containsKey(s)) {
+ if (!doc.containsKey(s)) {
doc.put(s, 0);
}
doc.put(s, doc.get(s) + 1);
@@ -439,7 +439,7 @@ public class InMemoryCollapsedVariationa
*/
private static String[] loadDictionary(String dictionaryPath, Configuration conf) {
- if(dictionaryPath == null) {
+ if (dictionaryPath == null) {
return null;
}
Path dictionaryFile = new Path(dictionaryPath);
@@ -453,7 +453,7 @@ public class InMemoryCollapsedVariationa
maxTermId = Math.max(maxTermId, record.getSecond().get());
}
String[] terms = new String[maxTermId + 1];
- for(Pair<Integer, String> pair : termList) {
+ for (Pair<Integer, String> pair : termList) {
terms[pair.getFirst()] = pair.getSecond();
}
return terms;
@@ -461,7 +461,7 @@ public class InMemoryCollapsedVariationa
@Override
public Configuration getConf() {
- if(super.getConf() == null) {
+ if (super.getConf() == null) {
setConf(new Configuration());
}
return super.getConf();
@@ -472,16 +472,16 @@ public class InMemoryCollapsedVariationa
Path vectorPath = new Path(vectorPathString);
FileSystem fs = vectorPath.getFileSystem(conf);
List<Path> subPaths = Lists.newArrayList();
- if(fs.isFile(vectorPath)) {
+ if (fs.isFile(vectorPath)) {
subPaths.add(vectorPath);
} else {
- for(FileStatus fileStatus : fs.listStatus(vectorPath, PathFilters.logsCRCFilter())) {
+ for (FileStatus fileStatus : fs.listStatus(vectorPath, PathFilters.logsCRCFilter())) {
subPaths.add(fileStatus.getPath());
}
}
List<Vector> vectorList = Lists.newArrayList();
- for(Path subPath : subPaths) {
- for(Pair<IntWritable, VectorWritable> record
+ for (Path subPath : subPaths) {
+ for (Pair<IntWritable, VectorWritable> record
: new SequenceFileIterable<IntWritable, VectorWritable>(subPath, true, conf)) {
vectorList.add(record.getSecond().get());
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/ModelTrainer.java Tue May 15 01:58:58 2012
@@ -118,13 +118,13 @@ public class ModelTrainer {
Iterator<MatrixSlice> docTopicIterator = docTopicCounts.iterator();
double perplexity = 0;
double matrixNorm = 0;
- while(docIterator.hasNext() && docTopicIterator.hasNext()) {
+ while (docIterator.hasNext() && docTopicIterator.hasNext()) {
MatrixSlice docSlice = docIterator.next();
MatrixSlice topicSlice = docTopicIterator.next();
int docId = docSlice.index();
Vector document = docSlice.vector();
Vector topicDist = topicSlice.vector();
- if(testFraction == 0 || docId % (1/testFraction) == 0) {
+ if (testFraction == 0 || docId % (1/testFraction) == 0) {
trainSync(document, topicDist, false, 10);
perplexity += readModel.perplexity(document, topicDist);
matrixNorm += document.norm(1);
@@ -143,14 +143,14 @@ public class ModelTrainer {
Map<Vector, Vector> batch = Maps.newHashMap();
int numTokensInBatch = 0;
long batchStart = System.nanoTime();
- while(docIterator.hasNext() && docTopicIterator.hasNext()) {
+ while (docIterator.hasNext() && docTopicIterator.hasNext()) {
i++;
Vector document = docIterator.next().vector();
Vector topicDist = docTopicIterator.next().vector();
- if(isReadWrite) {
- if(batch.size() < numTrainThreads) {
+ if (isReadWrite) {
+ if (batch.size() < numTrainThreads) {
batch.put(document, topicDist);
- if(log.isDebugEnabled()) {
+ if (log.isDebugEnabled()) {
numTokensInBatch += document.getNumNondefaultElements();
}
} else {
@@ -164,13 +164,13 @@ public class ModelTrainer {
} else {
long start = System.nanoTime();
train(document, topicDist, true, numDocTopicIters);
- if(log.isDebugEnabled()) {
+ if (log.isDebugEnabled()) {
times[i % times.length] =
(System.nanoTime() - start) /(1.0e6 * document.getNumNondefaultElements());
- if(i % 100 == 0) {
+ if (i % 100 == 0) {
long time = System.nanoTime() - startTime;
log.debug("trained " + i + " documents in " + (time / 1.0e6) + "ms");
- if(i % 500 == 0) {
+ if (i % 500 == 0) {
Arrays.sort(times);
log.debug("training took median " + times[times.length / 2] + "ms per token-instance");
}
@@ -182,17 +182,17 @@ public class ModelTrainer {
}
public void batchTrain(Map<Vector, Vector> batch, boolean update, int numDocTopicsIters) {
- while(true) {
+ while (true) {
try {
List<TrainerRunnable> runnables = Lists.newArrayList();
- for(Map.Entry<Vector, Vector> entry : batch.entrySet()) {
+ for (Map.Entry<Vector, Vector> entry : batch.entrySet()) {
runnables.add(new TrainerRunnable(readModel, null, entry.getKey(),
entry.getValue(), new SparseRowMatrix(numTopics, numTerms, true),
numDocTopicsIters));
}
threadPool.invokeAll(runnables);
- if(update) {
- for(TrainerRunnable runnable : runnables) {
+ if (update) {
+ for (TrainerRunnable runnable : runnables) {
writeModel.update(runnable.docTopicModel);
}
}
@@ -204,7 +204,7 @@ public class ModelTrainer {
}
public void train(Vector document, Vector docTopicCounts, boolean update, int numDocTopicIters) {
- while(true) {
+ while (true) {
try {
workQueue.put(new TrainerRunnable(readModel,
update ? writeModel : null, document, docTopicCounts, new SparseRowMatrix(
@@ -235,7 +235,7 @@ public class ModelTrainer {
log.info("Initiating stopping of training threadpool");
try {
threadPool.shutdown();
- if(!threadPool.awaitTermination(60, TimeUnit.SECONDS)) {
+ if (!threadPool.awaitTermination(60, TimeUnit.SECONDS)) {
log.warn("Threadpool timed out on await termination - jobs still running!");
}
long newTime = System.nanoTime();
@@ -277,11 +277,11 @@ public class ModelTrainer {
@Override
public void run() {
- for(int i = 0; i < numDocTopicIters; i++) {
+ for (int i = 0; i < numDocTopicIters; i++) {
// synchronous read-only call:
readModel.trainDocTopicModel(document, docTopics, docTopicModel);
}
- if(writeModel != null) {
+ if (writeModel != null) {
// parallel call which is read-only on the docTopicModel, and write-only on the writeModel
// this method does not return until all rows of the docTopicModel have been submitted
// to write work queues
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/lda/cvb/TopicModel.java Tue May 15 01:58:58 2012
@@ -135,9 +135,9 @@ public class TopicModel implements Confi
this.alpha = alpha;
this.sampler = new Sampler(RandomUtils.getRandom());
this.numThreads = numThreads;
- if(modelWeight != 1) {
+ if (modelWeight != 1) {
topicSums.assign(Functions.mult(modelWeight));
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
topicTermCounts.viewRow(x).assign(Functions.mult(modelWeight));
}
}
@@ -146,7 +146,7 @@ public class TopicModel implements Confi
private static Vector viewRowSums(Matrix m) {
Vector v = new DenseVector(m.numRows());
- for(MatrixSlice slice : m) {
+ for (MatrixSlice slice : m) {
v.set(slice.index(), slice.vector().norm(1));
}
return v;
@@ -157,7 +157,7 @@ public class TopicModel implements Confi
new ArrayBlockingQueue<Runnable>(numThreads * 10));
threadPool.allowCoreThreadTimeOut(false);
updaters = new Updater[numThreads];
- for(int i = 0; i < numThreads; i++) {
+ for (int i = 0; i < numThreads; i++) {
updaters[i] = new Updater();
threadPool.submit(updaters[i]);
}
@@ -179,14 +179,14 @@ public class TopicModel implements Confi
private static Pair<Matrix,Vector> randomMatrix(int numTopics, int numTerms, Random random) {
Matrix topicTermCounts = new DenseMatrix(numTopics, numTerms);
Vector topicSums = new DenseVector(numTopics);
- if(random != null) {
- for(int x = 0; x < numTopics; x++) {
- for(int term = 0; term < numTerms; term++) {
+ if (random != null) {
+ for (int x = 0; x < numTopics; x++) {
+ for (int term = 0; term < numTerms; term++) {
topicTermCounts.viewRow(x).set(term, random.nextDouble());
}
}
}
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
topicSums.set(x, random == null ? 1.0 : topicTermCounts.viewRow(x).norm(1));
}
return Pair.of(topicTermCounts, topicSums);
@@ -197,23 +197,23 @@ public class TopicModel implements Confi
int numTopics = -1;
int numTerms = -1;
List<Pair<Integer, Vector>> rows = Lists.newArrayList();
- for(Path modelPath : modelPaths) {
- for(Pair<IntWritable, VectorWritable> row :
+ for (Path modelPath : modelPaths) {
+ for (Pair<IntWritable, VectorWritable> row :
new SequenceFileIterable<IntWritable, VectorWritable>(modelPath, true, conf)) {
rows.add(Pair.of(row.getFirst().get(), row.getSecond().get()));
numTopics = Math.max(numTopics, row.getFirst().get());
- if(numTerms < 0) {
+ if (numTerms < 0) {
numTerms = row.getSecond().get().size();
}
}
}
- if(rows.isEmpty()) {
+ if (rows.isEmpty()) {
throw new IOException(Arrays.toString(modelPaths) + " have no vectors in it");
}
numTopics++;
Matrix model = new DenseMatrix(numTopics, numTerms);
Vector topicSums = new DenseVector(numTopics);
- for(Pair<Integer, Vector> pair : rows) {
+ for (Pair<Integer, Vector> pair : rows) {
model.viewRow(pair.getFirst()).assign(pair.getSecond());
topicSums.set(pair.getFirst(), pair.getSecond().norm(1));
}
@@ -224,7 +224,7 @@ public class TopicModel implements Confi
@Override
public String toString() {
StringBuilder buf = new StringBuilder();
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
String v = dictionary != null
? vectorToSortedString(topicTermCounts.viewRow(x).normalize(1), dictionary)
: topicTermCounts.viewRow(x).asFormatString();
@@ -242,7 +242,7 @@ public class TopicModel implements Confi
}
public void reset() {
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
topicTermCounts.assignRow(x, new SequentialAccessSparseVector(numTerms));
}
topicSums.assign(1.0);
@@ -250,13 +250,13 @@ public class TopicModel implements Confi
}
public void awaitTermination() {
- for(Updater updater : updaters) {
+ for (Updater updater : updaters) {
updater.shutdown();
}
}
public void renormalize() {
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
topicTermCounts.assignRow(x, topicTermCounts.viewRow(x).normalize(1));
topicSums.assign(1.0);
}
@@ -270,16 +270,16 @@ public class TopicModel implements Confi
// now multiply, term-by-term, by the document, to get the weighted distribution of
// term-topic pairs from this document.
Iterator<Vector.Element> it = original.iterateNonZero();
- while(it.hasNext()) {
+ while (it.hasNext()) {
Vector.Element e = it.next();
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
Vector docTopicModelRow = docTopicModel.viewRow(x);
docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get());
}
}
// now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm
topics.assign(0.0);
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
topics.set(x, docTopicModel.viewRow(x).norm(1));
}
// now renormalize so that sum_x(p(x|doc)) = 1
@@ -289,12 +289,12 @@ public class TopicModel implements Confi
public Vector infer(Vector original, Vector docTopics) {
Vector pTerm = original.like();
Iterator<Vector.Element> it = original.iterateNonZero();
- while(it.hasNext()) {
+ while (it.hasNext()) {
Vector.Element e = it.next();
int term = e.index();
// p(a) = sum_x (p(a|x) * p(x|i))
double pA = 0;
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
pA += (topicTermCounts.viewRow(x).get(term) / topicSums.get(x)) * docTopics.get(x);
}
pTerm.set(term, pA);
@@ -303,7 +303,7 @@ public class TopicModel implements Confi
}
public void update(Matrix docTopicCounts) {
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
updaters[x % updaters.length].update(x, docTopicCounts.viewRow(x));
}
}
@@ -314,7 +314,7 @@ public class TopicModel implements Confi
}
public void update(int termId, Vector topicCounts) {
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
Vector v = topicTermCounts.viewRow(x);
v.set(termId, v.get(termId) + topicCounts.get(x));
}
@@ -323,7 +323,7 @@ public class TopicModel implements Confi
public void persist(Path outputDir, boolean overwrite) throws IOException {
FileSystem fs = outputDir.getFileSystem(conf);
- if(overwrite) {
+ if (overwrite) {
fs.delete(outputDir, true); // CHECK second arg
}
DistributedRowMatrixWriter.write(outputDir, conf, topicTermCounts);
@@ -341,7 +341,7 @@ public class TopicModel implements Confi
*/
private void pTopicGivenTerm(Vector document, Vector docTopics, Matrix termTopicDist) {
// for each topic x
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
// get p(topic x | document i), or 1.0 if docTopics is null
double topicWeight = docTopics == null ? 1.0 : docTopics.get(x);
// get w(term a | topic x)
@@ -353,7 +353,7 @@ public class TopicModel implements Confi
// for each term a in document i with non-zero weight
Iterator<Vector.Element> it = document.iterateNonZero();
- while(it.hasNext()) {
+ while (it.hasNext()) {
Vector.Element e = it.next();
int termIndex = e.index();
@@ -371,11 +371,11 @@ public class TopicModel implements Confi
double perplexity = 0;
double norm = docTopics.norm(1) + (docTopics.size() * alpha);
Iterator<Vector.Element> it = document.iterateNonZero();
- while(it.hasNext()) {
+ while (it.hasNext()) {
Vector.Element e = it.next();
int term = e.index();
double prob = 0;
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
double d = (docTopics.get(x) + alpha) / norm;
double p = d * (topicTermCounts.viewRow(x).get(term) + eta)
/ (topicSums.get(x) + eta * numTerms);
@@ -389,14 +389,14 @@ public class TopicModel implements Confi
private void normalizeByTopic(Matrix perTopicSparseDistributions) {
Iterator<Vector.Element> it = perTopicSparseDistributions.viewRow(0).iterateNonZero();
// then make sure that each of these is properly normalized by topic: sum_x(p(x|t,d)) = 1
- while(it.hasNext()) {
+ while (it.hasNext()) {
Vector.Element e = it.next();
int a = e.index();
double sum = 0;
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
sum += perTopicSparseDistributions.viewRow(x).get(a);
}
- for(int x = 0; x < numTopics; x++) {
+ for (int x = 0; x < numTopics; x++) {
perTopicSparseDistributions.viewRow(x).set(a,
perTopicSparseDistributions.viewRow(x).get(a) / sum);
}
@@ -407,7 +407,7 @@ public class TopicModel implements Confi
List<Pair<String,Double>> vectorValues =
new ArrayList<Pair<String, Double>>(vector.getNumNondefaultElements());
Iterator<Vector.Element> it = vector.iterateNonZero();
- while(it.hasNext()) {
+ while (it.hasNext()) {
Vector.Element e = it.next();
vectorValues.add(Pair.of(dictionary != null ? dictionary[e.index()] : String.valueOf(e.index()),
e.get()));
@@ -421,7 +421,7 @@ public class TopicModel implements Confi
StringBuilder bldr = new StringBuilder(2048);
bldr.append('{');
int i = 0;
- while(listIt.hasNext() && i < 25) {
+ while (listIt.hasNext() && i < 25) {
i++;
Pair<String,Double> p = listIt.next();
bldr.append(p.getFirst());
@@ -429,7 +429,7 @@ public class TopicModel implements Confi
bldr.append(p.getSecond());
bldr.append(',');
}
- if(bldr.length() > 1) {
+ if (bldr.length() > 1) {
bldr.setCharAt(bldr.length() - 1, '}');
}
return bldr.toString();
@@ -454,7 +454,7 @@ public class TopicModel implements Confi
public void shutdown() {
try {
synchronized (this) {
- while(!shutdownComplete) {
+ while (!shutdownComplete) {
shutdown = true;
wait(10000L); // Arbitrarily, wait 10 seconds rather than forever for this
}
@@ -465,10 +465,10 @@ public class TopicModel implements Confi
}
public boolean update(int topic, Vector v) {
- if(shutdown) { // maybe don't do this?
+ if (shutdown) { // maybe don't do this?
throw new IllegalStateException("In SHUTDOWN state: cannot submit tasks");
}
- while(true) { // keep trying if interrupted
+ while (true) { // keep trying if interrupted
try {
// start async operation by submitting to the queue
queue.put(Pair.of(topic, v));
@@ -481,10 +481,10 @@ public class TopicModel implements Confi
}
@Override public void run() {
- while(!shutdown) {
+ while (!shutdown) {
try {
Pair<Integer, Vector> pair = queue.poll(1, TimeUnit.SECONDS);
- if(pair != null) {
+ if (pair != null) {
updateTopic(pair.getFirst(), pair.getSecond());
}
} catch (InterruptedException e) {
@@ -492,7 +492,7 @@ public class TopicModel implements Confi
}
}
// in shutdown mode, finish remaining tasks!
- for(Pair<Integer, Vector> pair : queue) {
+ for (Pair<Integer, Vector> pair : queue) {
updateTopic(pair.getFirst(), pair.getSecond());
}
synchronized (this) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java Tue May 15 01:58:58 2012
@@ -60,7 +60,7 @@ private Integer numReducers;
clusterWritable.setValue(canopy);
context.write(new Text(String.valueOf(reducer)), clusterWritable);
reducer++;
- if (reducer >= numReducers){
+ if (reducer >= numReducers) {
reducer=0;
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java Tue May 15 01:58:58 2012
@@ -135,7 +135,7 @@ public class EigencutsDriver extends Abs
DistributedRowMatrix U = performEigenDecomposition(conf, L, state, eigenrank, overshoot, outputCalc);
U.setConf(new Configuration(conf));
List<Double> eigenValues = Lists.newArrayList();
- for(int i=0; i<eigenrank; i++) {
+ for (int i=0; i<eigenrank; i++) {
eigenValues.set(i, state.getSingularValue(i));
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Tue May 15 01:58:58 2012
@@ -138,11 +138,11 @@ public abstract class AbstractJob extend
return new Path(outputPath, path);
}
- protected File getInputFile(){
+ protected File getInputFile() {
return inputFile;
}
- protected File getOutputFile(){
+ protected File getOutputFile() {
return outputFile;
}
@@ -288,9 +288,9 @@ public abstract class AbstractJob extend
* @param name The name of the option
* @return the {@link org.apache.commons.cli2.Option} with the name, else null
*/
- protected Option getCLIOption(String name){
+ protected Option getCLIOption(String name) {
for (Option option : options) {
- if (option.getPreferredName().equals(name)){
+ if (option.getPreferredName().equals(name)) {
return option;
}
}
@@ -370,7 +370,7 @@ public abstract class AbstractJob extend
this.tempPath = new Path(getOption("tempDir"));
- if (!hasOption("quiet")){
+ if (!hasOption("quiet")) {
log.info("Command line arguments: {}", argMap);
}
return argMap;
@@ -388,7 +388,7 @@ public abstract class AbstractJob extend
*/
public String getOption(String optionName) {
List<String> list = argMap.get(keyFor(optionName));
- if (list != null && list.isEmpty() == false){
+ if (list != null && list.isEmpty() == false) {
return list.get(0);
}
return null;
@@ -400,7 +400,7 @@ public abstract class AbstractJob extend
* @param defaultVal The default value.
* @return The requested option, else the default value if it doesn't exist
*/
- public String getOption(String optionName, String defaultVal){
+ public String getOption(String optionName, String defaultVal) {
String res = getOption(optionName);
if (res == null) {
res = defaultVal;
@@ -413,7 +413,7 @@ public abstract class AbstractJob extend
* @param optionName The unadorned (no "--" prefixing it) option name
* @return The values, else null. If the option is present, but has no values, then the result will be an empty list (Collections.emptyList())
*/
- public List<String> getOptions(String optionName){
+ public List<String> getOptions(String optionName) {
return argMap.get(keyFor(optionName));
}
@@ -502,7 +502,7 @@ public abstract class AbstractJob extend
// nulls are ok, for cases where options are simple flags.
List vo = cmdLine.getValues(o);
- if (vo != null && vo.isEmpty() == false){
+ if (vo != null && vo.isEmpty() == false) {
List<String> vals = new ArrayList<String>();
for (Object o1 : vo) {
vals.add(o1.toString());
@@ -521,9 +521,9 @@ public abstract class AbstractJob extend
* @param optName The adorned (including "--") option name
* @return The first value in the match, else null
*/
- public static String getOption(Map<String, List<String>> args, String optName){
+ public static String getOption(Map<String, List<String>> args, String optName) {
List<String> res = args.get(optName);
- if (res != null && res.isEmpty() == false){
+ if (res != null && res.isEmpty() == false) {
return res.get(0);
}
return null;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/StringUtils.java Tue May 15 01:58:58 2012
@@ -56,7 +56,7 @@ public final class StringUtils {
return (T) XSTREAM.fromXML(str);
}
- public static String escapeXML(String input){
+ public static String escapeXML(String input) {
return input.replaceAll("\"|\\&|\\<|\\>|\'", "_");
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/distance/MahalanobisDistanceMeasure.java Tue May 15 01:58:58 2012
@@ -58,7 +58,7 @@ public class MahalanobisDistanceMeasure
/*public MahalanobisDistanceMeasure(Vector meanVector,Matrix inputMatrix, boolean inversionNeeded)
{
this.meanVector=meanVector;
- if(inversionNeeded)
+ if (inversionNeeded)
setCovarianceMatrix(inputMatrix);
else
setInverseCovarianceMatrix(inputMatrix);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterator.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterator.java Tue May 15 01:58:58 2012
@@ -88,7 +88,7 @@ public final class SequenceFileDirIterat
private void init(FileStatus[] statuses,
final boolean reuseKeyValueInstances,
- final Configuration conf){
+ final Configuration conf) {
/*
* prevent NPEs. Unfortunately, Hadoop would return null for list if nothing
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/DistributedRowMatrixWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/DistributedRowMatrixWriter.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/DistributedRowMatrixWriter.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/DistributedRowMatrixWriter.java Tue May 15 01:58:58 2012
@@ -35,7 +35,7 @@ public final class DistributedRowMatrixW
IntWritable.class, VectorWritable.class);
IntWritable topic = new IntWritable();
VectorWritable vector = new VectorWritable();
- for(MatrixSlice slice : matrix) {
+ for (MatrixSlice slice : matrix) {
topic.set(slice.index());
vector.set(slice.vector());
writer.append(topic, vector);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixUtils.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixUtils.java Tue May 15 01:58:58 2012
@@ -43,7 +43,7 @@ public final class MatrixUtils {
IntWritable.class, VectorWritable.class);
IntWritable topic = new IntWritable();
VectorWritable vector = new VectorWritable();
- for(MatrixSlice slice : matrix) {
+ for (MatrixSlice slice : matrix) {
topic.set(slice.index());
vector.set(slice.vector());
writer.append(topic, vector);
@@ -56,31 +56,31 @@ public final class MatrixUtils {
int numCols = -1;
boolean sparse = false;
List<Pair<Integer, Vector>> rows = Lists.newArrayList();
- for(Path modelPath : modelPaths) {
- for(Pair<IntWritable, VectorWritable> row :
+ for (Path modelPath : modelPaths) {
+ for (Pair<IntWritable, VectorWritable> row :
new SequenceFileIterable<IntWritable, VectorWritable>(modelPath, true, conf)) {
rows.add(Pair.of(row.getFirst().get(), row.getSecond().get()));
numRows = Math.max(numRows, row.getFirst().get());
sparse = !row.getSecond().get().isDense();
- if(numCols < 0) {
+ if (numCols < 0) {
numCols = row.getSecond().get().size();
}
}
}
- if(rows.isEmpty()) {
+ if (rows.isEmpty()) {
throw new IOException(Arrays.toString(modelPaths) + " have no vectors in it");
}
numRows++;
Vector[] arrayOfRows = new Vector[numRows];
- for(Pair<Integer, Vector> pair : rows) {
+ for (Pair<Integer, Vector> pair : rows) {
arrayOfRows[pair.getFirst()] = pair.getSecond();
}
Matrix matrix;
- if(sparse) {
+ if (sparse) {
matrix = new SparseRowMatrix(numRows, numCols, arrayOfRows);
} else {
matrix = new DenseMatrix(numRows, numCols);
- for(int i = 0; i < numRows; i++) {
+ for (int i = 0; i < numRows; i++) {
matrix.assignRow(i, arrayOfRows[i]);
}
}
@@ -89,7 +89,7 @@ public final class MatrixUtils {
public static OpenObjectIntHashMap<String> readDictionary(Configuration conf, Path... dictPath) {
OpenObjectIntHashMap<String> dictionary = new OpenObjectIntHashMap<String>();
- for(Path dictionaryFile : dictPath) {
+ for (Path dictionaryFile : dictPath) {
for (Pair<Writable, IntWritable> record
: new SequenceFileIterable<Writable, IntWritable>(dictionaryFile, true, conf)) {
dictionary.put(record.getFirst().toString(), record.getSecond().get());
@@ -100,12 +100,12 @@ public final class MatrixUtils {
public static String[] invertDictionary(OpenObjectIntHashMap<String> termIdMap) {
int maxTermId = -1;
- for(String term : termIdMap.keys()) {
+ for (String term : termIdMap.keys()) {
maxTermId = Math.max(maxTermId, termIdMap.get(term));
}
maxTermId++;
String[] dictionary = new String[maxTermId];
- for(String term : termIdMap.keys()) {
+ for (String term : termIdMap.keys()) {
dictionary[termIdMap.get(term)] = term;
}
return dictionary;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java Tue May 15 01:58:58 2012
@@ -190,7 +190,7 @@ public class DistributedLanczosSolver ex
matrix.setConf(new Configuration(getConf() != null ? getConf() : new Configuration()));
LanczosState state;
- if(workingDirPath == null) {
+ if (workingDirPath == null) {
state = new LanczosState(matrix, desiredRank, getInitialVector(matrix));
} else {
HdfsBackedLanczosState hState =
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/HdfsBackedLanczosState.java Tue May 15 01:58:58 2012
@@ -57,7 +57,7 @@ public class HdfsBackedLanczosState exte
//Path metadataPath = new Path(dir, METADATA_FILE);
basisPath = new Path(dir, BASIS_PREFIX);
singularVectorPath = new Path(dir, SINGULAR_PREFIX);
- if(corpus instanceof Configurable) {
+ if (corpus instanceof Configurable) {
setConf(((Configurable)corpus).getConf());
}
}
@@ -84,8 +84,8 @@ public class HdfsBackedLanczosState exte
}
private void createDirIfNotExist(Path path) throws IOException {
- if(!fs.exists(path)) {
- if(!fs.mkdirs(path)) {
+ if (!fs.exists(path)) {
+ if (!fs.mkdirs(path)) {
throw new IOException("Unable to create: " + path);
}
}
@@ -102,28 +102,28 @@ public class HdfsBackedLanczosState exte
}
protected void updateHdfsState() throws IOException {
- if(conf == null) {
+ if (conf == null) {
return;
}
int numBasisVectorsOnDisk = 0;
Path nextBasisVectorPath = new Path(basisPath, BASIS_PREFIX + '_' + numBasisVectorsOnDisk);
- while(fs.exists(nextBasisVectorPath)) {
+ while (fs.exists(nextBasisVectorPath)) {
nextBasisVectorPath = new Path(basisPath, BASIS_PREFIX + '_' + ++numBasisVectorsOnDisk);
}
Vector nextVector;
- while(numBasisVectorsOnDisk < iterationNumber &&
+ while (numBasisVectorsOnDisk < iterationNumber &&
(nextVector = getBasisVector(numBasisVectorsOnDisk)) != null) {
persistVector(nextBasisVectorPath, numBasisVectorsOnDisk, nextVector);
nextBasisVectorPath = new Path(basisPath, BASIS_PREFIX + '_' + ++numBasisVectorsOnDisk);
}
- if(scaleFactor <= 0) {
+ if (scaleFactor <= 0) {
scaleFactor = getScaleFactor(); // load from disk if possible
}
diagonalMatrix = getDiagonalMatrix(); // load from disk if possible
Vector norms = new DenseVector(diagonalMatrix.numCols() - 1);
Vector projections = new DenseVector(diagonalMatrix.numCols());
int i = 0;
- while(i < diagonalMatrix.numCols() - 1) {
+ while (i < diagonalMatrix.numCols() - 1) {
norms.set(i, diagonalMatrix.get(i, i + 1));
projections.set(i, diagonalMatrix.get(i, i));
i++;
@@ -132,7 +132,7 @@ public class HdfsBackedLanczosState exte
persistVector(new Path(baseDir, "projections"), 0, projections);
persistVector(new Path(baseDir, "norms"), 0, norms);
persistVector(new Path(baseDir, "scaleFactor"), 0, new DenseVector(new double[] {scaleFactor}));
- for(Map.Entry<Integer, Vector> entry : singularVectors.entrySet()) {
+ for (Map.Entry<Integer, Vector> entry : singularVectors.entrySet()) {
persistVector(new Path(singularVectorPath, SINGULAR_PREFIX + '_' + entry.getKey()),
entry.getKey(), entry.getValue());
}
@@ -142,7 +142,7 @@ public class HdfsBackedLanczosState exte
protected void persistVector(Path p, int key, Vector vector) throws IOException {
SequenceFile.Writer writer = null;
try {
- if(fs.exists(p)) {
+ if (fs.exists(p)) {
log.warn("{} exists, will overwrite", p);
fs.delete(p, true);
}
@@ -155,14 +155,14 @@ public class HdfsBackedLanczosState exte
}
protected Vector fetchVector(Path p, int keyIndex) throws IOException {
- if(!fs.exists(p)) {
+ if (!fs.exists(p)) {
return null;
}
SequenceFile.Reader reader = new SequenceFile.Reader(fs, p, conf);
IntWritable key = new IntWritable();
VectorWritable vw = new VectorWritable();
- while(reader.next(key, vw)) {
- if(key.get() == keyIndex) {
+ while (reader.next(key, vw)) {
+ if (key.get() == keyIndex) {
return vw.get();
}
}
@@ -171,7 +171,7 @@ public class HdfsBackedLanczosState exte
@Override
public Vector getBasisVector(int i) {
- if(!basis.containsKey(i)) {
+ if (!basis.containsKey(i)) {
try {
Vector v = fetchVector(new Path(basisPath, BASIS_PREFIX + '_' + i), i);
basis.put(i, v);
@@ -184,7 +184,7 @@ public class HdfsBackedLanczosState exte
@Override
public Vector getRightSingularVector(int i) {
- if(!singularVectors.containsKey(i)) {
+ if (!singularVectors.containsKey(i)) {
try {
Vector v = fetchVector(new Path(singularVectorPath, BASIS_PREFIX + '_' + i), i);
singularVectors.put(i, v);
@@ -197,10 +197,10 @@ public class HdfsBackedLanczosState exte
@Override
public double getScaleFactor() {
- if(scaleFactor <= 0) {
+ if (scaleFactor <= 0) {
try {
Vector v = fetchVector(new Path(baseDir, "scaleFactor"), 0);
- if(v != null && v.size() > 0) {
+ if (v != null && v.size() > 0) {
scaleFactor = v.get(0);
}
} catch (IOException e) {
@@ -212,16 +212,16 @@ public class HdfsBackedLanczosState exte
@Override
public Matrix getDiagonalMatrix() {
- if(diagonalMatrix == null) {
+ if (diagonalMatrix == null) {
diagonalMatrix = new DenseMatrix(desiredRank, desiredRank);
}
- if(diagonalMatrix.get(0, 1) <= 0) {
+ if (diagonalMatrix.get(0, 1) <= 0) {
try {
Vector norms = fetchVector(new Path(baseDir, "norms"), 0);
Vector projections = fetchVector(new Path(baseDir, "projections"), 0);
- if(norms != null && projections != null) {
+ if (norms != null && projections != null) {
int i=0;
- while(i<projections.size()-1) {
+ while (i<projections.size()-1) {
diagonalMatrix.set(i, i, projections.get(i));
diagonalMatrix.set(i, i+1, norms.get(i));
diagonalMatrix.set(i+1, i, norms.get(i));
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/StandardDeviationCalculatorMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/StandardDeviationCalculatorMapper.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/StandardDeviationCalculatorMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stats/StandardDeviationCalculatorMapper.java Tue May 15 01:58:58 2012
@@ -39,7 +39,7 @@ public class StandardDeviationCalculator
}
//Kind of ugly, but such is life
double df = Double.NaN;
- if (value instanceof LongWritable){
+ if (value instanceof LongWritable) {
df = ((LongWritable)value).get();
} else if (value instanceof DoubleWritable) {
df = ((DoubleWritable)value).get();
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/Sampler.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/Sampler.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/Sampler.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/Sampler.java Tue May 15 01:58:58 2012
@@ -55,7 +55,7 @@ public class Sampler {
}
public int sample() {
- if(sampler == null) {
+ if (sampler == null) {
throw new NullPointerException("Sampler must have been constructed with a distribution, or"
+ " else sample(Vector) should be used to sample");
}
@@ -71,7 +71,7 @@ public class Sampler {
double[] partition = new double[size];
double norm = vectorDistribution.norm(1);
double sum = 0;
- for(int i = 0; i < size; i++) {
+ for (int i = 0; i < size; i++) {
sum += vectorDistribution.get(i) / norm;
partition[i] = sum;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/Entropy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/Entropy.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/Entropy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/stats/entropy/Entropy.java Tue May 15 01:58:58 2012
@@ -90,7 +90,7 @@ public final class Entropy extends Abstr
, "key");
Map<String, List<String>> arguments = parseArguments(args);
- if (arguments == null){
+ if (arguments == null) {
return;
}
source = getOption("source");
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFiles.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFiles.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFiles.java Tue May 15 01:58:58 2012
@@ -73,15 +73,15 @@ public final class EncodedVectorsFromSeq
boolean namedVectors = hasOption("namedVector");
int cardinality = 5000;
- if (hasOption("cardinality")){
+ if (hasOption("cardinality")) {
cardinality = Integer.parseInt(getOption("cardinality"));
}
String encoderName = "text";
- if (hasOption("encoderFieldName")){
+ if (hasOption("encoderFieldName")) {
encoderName = getOption("encoderFieldName");
}
String encoderClass = LuceneTextValueEncoder.class.getName();
- if (hasOption("encoderClass")){
+ if (hasOption("encoderClass")) {
encoderClass = getOption("encoderClass");
ClassUtils.instantiateAs(encoderClass, FeatureVectorEncoder.class, new Class[]{String.class}, new Object[]{encoderName});//try instantiating it
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/EncodingMapper.java Tue May 15 01:58:58 2012
@@ -64,7 +64,7 @@ public class EncodingMapper extends Mapp
FeatureVectorEncoder.class,
new Class[]{String.class},
new Object[]{encoderName});
- if (encoder instanceof LuceneTextValueEncoder){
+ if (encoder instanceof LuceneTextValueEncoder) {
((LuceneTextValueEncoder) encoder).setAnalyzer(analyzer);
}
}
@@ -77,7 +77,7 @@ public class EncodingMapper extends Mapp
} else {
vector = new RandomAccessSparseVector(cardinality);
}
- if (namedVectors){
+ if (namedVectors) {
vector = new NamedVector(vector, key.toString());
}
encoder.addToVector(value.toString(), vector);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java Tue May 15 01:58:58 2012
@@ -316,7 +316,7 @@ public final class SparseVectorsFromSequ
}
HadoopUtil.delete(new Configuration(conf), tfDir);
}
- if (processIdf){
+ if (processIdf) {
TFIDFConverter.processTfIdf(
new Path(outputDir, DictionaryVectorizer.DOCUMENT_VECTOR_OUTPUT_FOLDER),
outputDir, conf, docFrequenciesFeatures, minDf, maxDF, norm, logNormalize,
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/LuceneTextValueEncoder.java Tue May 15 01:58:58 2012
@@ -86,7 +86,7 @@ public class LuceneTextValueEncoder exte
@Override
public int read(char[] cbuf, int off, int len) {
int toRead = Math.min(len, buf.remaining());
- if (toRead > 0){
+ if (toRead > 0) {
buf.get(cbuf, off, toRead);
return toRead;
} else {
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluationRunner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluationRunner.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluationRunner.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/eval/LoadEvaluationRunner.java Tue May 15 01:58:58 2012
@@ -49,7 +49,7 @@ public final class LoadEvaluationRunner
System.out.println("Run Items");
ItemSimilarity similarity = new EuclideanDistanceSimilarity(model);
Recommender recommender = new GenericItemBasedRecommender(model, similarity); // Use an item-item recommender
- for (int i = 0; i < LOOPS; i++){
+ for (int i = 0; i < LOOPS; i++) {
LoadStatistics loadStats = LoadEvaluator.runLoad(recommender, howMany);
System.out.println(loadStats);
}
@@ -58,7 +58,7 @@ public final class LoadEvaluationRunner
UserSimilarity userSim = new EuclideanDistanceSimilarity(model);
UserNeighborhood neighborhood = new NearestNUserNeighborhood(10, userSim, model);
recommender = new GenericUserBasedRecommender(model, neighborhood, userSim);
- for (int i = 0; i < LOOPS; i++){
+ for (int i = 0; i < LOOPS; i++) {
LoadStatistics loadStats = LoadEvaluator.runLoad(recommender, howMany);
System.out.println(loadStats);
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/data/Utils.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/data/Utils.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/df/data/Utils.java Tue May 15 01:58:58 2012
@@ -178,7 +178,7 @@ public final class Utils {
vector[attr] = Double.NaN;
} else if (attrs[attr].isNumerical()) {
vector[attr] = rng.nextDouble();
- } else if (attrs[attr].isCategorical()){
+ } else if (attrs[attr].isCategorical()) {
vector[attr] = rng.nextInt(CATEGORICAL_RANGE);
} else { // LABEL
if (regression) {
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/evaluation/AucTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/evaluation/AucTest.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/evaluation/AucTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/evaluation/AucTest.java Tue May 15 01:58:58 2012
@@ -31,7 +31,7 @@ public class AucTest extends MahoutTestC
Auc auc = new Auc();
Random gen = RandomUtils.getRandom();
auc.setProbabilityScore(false);
- for (int i=0;i<100000;i++) {
+ for (int i=0; i<100000; i++) {
auc.add(0, gen.nextGaussian());
auc.add(1, gen.nextGaussian() + 1);
}
@@ -43,7 +43,7 @@ public class AucTest extends MahoutTestC
Auc auc = new Auc();
Random gen = RandomUtils.getRandom();
auc.setProbabilityScore(false);
- for (int i=0;i<100000;i++) {
+ for (int i=0; i<100000; i++) {
auc.add(0, gen.nextGaussian());
auc.add(1, gen.nextGaussian() + 1);
}
@@ -67,7 +67,7 @@ public class AucTest extends MahoutTestC
Random gen = RandomUtils.getRandom();
Normal n0 = new Normal(-1, 1, gen);
Normal n1 = new Normal(1, 1, gen);
- for (int i=0;i<100000;i++) {
+ for (int i=0; i<100000; i++) {
double score = n0.nextDouble();
double p = n1.pdf(score) / (n0.pdf(score) + n1.pdf(score));
auc.add(0, p);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegressionTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegressionTest.java?rev=1338501&r1=1338500&r2=1338501&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegressionTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/classifier/sgd/AdaptiveLogisticRegressionTest.java Tue May 15 01:58:58 2012
@@ -162,7 +162,7 @@ public final class AdaptiveLogisticRegre
lr.setInterval(2000, 10000);
// start with minimum step size
- for (int i = 2000; i < 20000;i+=2000) {
+ for (int i = 2000; i < 20000; i+=2000) {
assertEquals(i + 2000, lr.nextStep(i));
}