You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2013/06/12 22:44:21 UTC
svn commit: r1492416 [2/3] - in /mahout/trunk: ./
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/
core/src/main/java/org/apache/mahout/classifier/
core/src/main/java/org/apache/mahout/clas...
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/HashedVector.java Wed Jun 12 20:44:19 2013
@@ -29,7 +29,7 @@ import org.apache.mahout.math.WeightedVe
* http://www.cs.princeton.edu/courses/archive/spring04/cos598B/bib/CharikarEstim.pdf
*/
public class HashedVector extends WeightedVector {
- protected static int INVALID_INDEX = -1;
+ protected static final int INVALID_INDEX = -1;
/**
* Value of the locality sensitive hash. It is 64 bit.
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/ProjectionSearch.java Wed Jun 12 20:44:19 2013
@@ -100,12 +100,12 @@ public class ProjectionSearch extends Up
}
int numVectors = scalarProjections.get(0).size();
for (TreeMultiset<WeightedThing<Vector>> s : scalarProjections) {
- Preconditions.checkArgument(s.size() == numVectors, "Number of vectors in projection sets " +
- "differ");
+ Preconditions.checkArgument(s.size() == numVectors, "Number of vectors in projection sets "
+ + "differ");
double firstWeight = s.firstEntry().getElement().getWeight();
for (WeightedThing<Vector> w : s) {
- Preconditions.checkArgument(firstWeight <= w.getWeight(), "Weights not in non-decreasing " +
- "order");
+ Preconditions.checkArgument(firstWeight <= w.getWeight(), "Weights not in non-decreasing "
+ + "order");
firstWeight = w.getWeight();
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/Searcher.java Wed Jun 12 20:44:19 2013
@@ -42,7 +42,7 @@ public abstract class Searcher implement
this.distanceMeasure = distanceMeasure;
}
- public DistanceMeasure getDistanceMeasure(){
+ public DistanceMeasure getDistanceMeasure() {
return distanceMeasure;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/random/RandomProjector.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/random/RandomProjector.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/random/RandomProjector.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/random/RandomProjector.java Wed Jun 12 20:44:19 2013
@@ -81,7 +81,7 @@ public final class RandomProjector {
public static Matrix generateBasisZeroPlusMinusOne(int projectedVectorSize, int vectorSize) {
Matrix basisMatrix = new DenseMatrix(projectedVectorSize, vectorSize);
Multinomial<Double> choice = new Multinomial<Double>();
- choice.add(0.0, 2/3.0);
+ choice.add(0.0, 2 / 3.0);
choice.add(Math.sqrt(3.0), 1 / 6.0);
choice.add(-Math.sqrt(3.0), 1 / 6.0);
for (int i = 0; i < projectedVectorSize; ++i) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/DictionaryVectorizer.java Wed Jun 12 20:44:19 2013
@@ -66,7 +66,7 @@ import org.slf4j.LoggerFactory;
* This is a dictionary based Vectorizer.
*/
public final class DictionaryVectorizer extends AbstractJob implements Vectorizer {
- private static Logger log = LoggerFactory.getLogger(DictionaryVectorizer.class);
+ private static final Logger log = LoggerFactory.getLogger(DictionaryVectorizer.class);
public static final String DOCUMENT_VECTOR_OUTPUT_FOLDER = "tf-vectors";
public static final String MIN_SUPPORT = "min.support";
@@ -377,14 +377,18 @@ public final class DictionaryVectorizer
addOption("minSupport", "s", "(Optional) Minimum Support. Default Value: 2", "2");
addOption("maxNGramSize", "ng", "(Optional) The maximum size of ngrams to create"
+ " (2 = bigrams, 3 = trigrams, etc) Default Value:1");
- addOption("minLLR", "ml", "(Optional)The minimum Log Likelihood Ratio(Float) Default is " + LLRReducer.DEFAULT_MIN_LLR);
- addOption("norm", "n", "The norm to use, expressed as either a float or \"INF\" if you want to use the Infinite norm. "
+ addOption("minLLR", "ml", "(Optional)The minimum Log Likelihood Ratio(Float) Default is "
+ + LLRReducer.DEFAULT_MIN_LLR);
+ addOption("norm", "n", "The norm to use, expressed as either a float or \"INF\" "
+ + "if you want to use the Infinite norm. "
+ "Must be greater or equal to 0. The default is not to normalize");
- addOption("logNormalize", "lnorm", "(Optional) Whether output vectors should be logNormalize. If set true else false", "false");
+ addOption("logNormalize", "lnorm", "(Optional) Whether output vectors should be logNormalize. "
+ + "If set true else false", "false");
addOption(DefaultOptionCreator.numReducersOption().create());
addOption("chunkSize", "chunk", "The chunkSize in MegaBytes. 100-10000 MB", "100");
addOption(DefaultOptionCreator.methodOption().create());
- addOption("namedVector", "nv", "(Optional) Whether output vectors should be NamedVectors. If set true else false", "false");
+ addOption("namedVector", "nv", "(Optional) Whether output vectors should be NamedVectors. "
+ + "If set true else false", "false");
if (parseArguments(args) == null) {
return -1;
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFiles.java Wed Jun 12 20:44:19 2013
@@ -96,8 +96,9 @@ public final class SparseVectorsFromSequ
abuilder.withName("maxDFSigma").withMinimum(1).withMaximum(1).create()).withDescription(
"What portion of the tf (tf-idf) vectors to be used, expressed in times the standard deviation (sigma) "
+ "of the document frequencies of these vectors. Can be used to remove really high frequency terms."
- + " Expressed as a double value. Good value to be specified is 3.0. In case the value is less than 0 "
- + "no vectors will be filtered out. Default is -1.0. Overrides maxDFPercent").withShortName("xs").create();
+ + " Expressed as a double value. Good value to be specified is 3.0. In case the value is less "
+ + "than 0 no vectors will be filtered out. Default is -1.0. Overrides maxDFPercent")
+ .withShortName("xs").create();
Option minLLROpt = obuilder.withLongName("minLLR").withRequired(false).withArgument(
abuilder.withName("minLLR").withMinimum(1).withMaximum(1).create()).withDescription(
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/pruner/WordsPrunerReducer.java Wed Jun 12 20:44:19 2013
@@ -42,7 +42,7 @@ public class WordsPrunerReducer extends
@Override
protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Context context)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
Iterator<VectorWritable> it = values.iterator();
if (!it.hasNext()) {
return;
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/term/TFPartialVectorReducer.java Wed Jun 12 20:44:19 2013
@@ -39,18 +39,15 @@ import org.apache.mahout.math.VectorWrit
import org.apache.mahout.math.map.OpenObjectIntHashMap;
import org.apache.mahout.vectorizer.DictionaryVectorizer;
import org.apache.mahout.vectorizer.common.PartialVectorMerger;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.util.Arrays;
import java.util.Iterator;
/**
* Converts a document in to a sparse vector
*/
public class TFPartialVectorReducer extends Reducer<Text, StringTuple, Text, VectorWritable> {
- private transient static Logger log = LoggerFactory.getLogger(TFPartialVectorReducer.class);
+
private final OpenObjectIntHashMap<String> dictionary = new OpenObjectIntHashMap<String>();
private int dimension;
@@ -63,7 +60,7 @@ public class TFPartialVectorReducer exte
@Override
protected void reduce(Text key, Iterable<StringTuple> values, Context context)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
Iterator<StringTuple> it = values.iterator();
if (!it.hasNext()) {
return;
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java Wed Jun 12 20:44:19 2013
@@ -26,6 +26,7 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
@@ -148,7 +149,7 @@ public class RecommenderJobTest extends
EasyMock.replay(context, userCounters);
- Collection<VarLongWritable> varLongWritables = new LinkedList<VarLongWritable>();
+ Collection<VarLongWritable> varLongWritables = Lists.newLinkedList();
varLongWritables.add(new EntityPrefWritable(34L, 1.0f));
varLongWritables.add(new EntityPrefWritable(56L, 2.0f));
@@ -653,7 +654,7 @@ public class RecommenderJobTest extends
public boolean matches(Object argument) {
if (argument instanceof RecommendedItemsWritable) {
RecommendedItemsWritable recommendedItemsWritable = (RecommendedItemsWritable) argument;
- List<RecommendedItem> expectedItems = new LinkedList<RecommendedItem>(Arrays.asList(items));
+ List<RecommendedItem> expectedItems = Arrays.asList(items);
return expectedItems.equals(recommendedItemsWritable.getRecommendedItems());
}
return false;
@@ -913,7 +914,7 @@ public class RecommenderJobTest extends
String[] tokens = keyValue[1].replaceAll("\\[", "")
.replaceAll("\\]", "").split(",");
- List<RecommendedItem> items = new LinkedList<RecommendedItem>();
+ List<RecommendedItem> items = Lists.newLinkedList();
for (String token : tokens) {
String[] itemTokens = token.split(":");
long itemID = Long.parseLong(itemTokens[0]);
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/MathHelper.java Wed Jun 12 20:44:19 2013
@@ -44,7 +44,7 @@ import org.easymock.IArgumentMatcher;
import org.junit.Assert;
/**
- * a collection of small helper methods useful for unit-testing mathematical operations
+ * a collection of small helper methods useful for unit-testing mathematical OPERATIONS
*/
public final class MathHelper {
@@ -161,9 +161,7 @@ public final class MathHelper {
for (Pair<IntWritable,VectorWritable> record :
new SequenceFileIterable<IntWritable,VectorWritable>(path, true, conf)) {
IntWritable key = record.getFirst();
- VectorWritable value = record.getSecond();
readOneRow = true;
- int row = key.get();
rows.put(key.get(), record.getSecond().get());
}
if (!readOneRow) {
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java Wed Jun 12 20:44:19 2013
@@ -24,6 +24,7 @@ import java.util.Deque;
import java.util.LinkedList;
import java.util.Random;
+import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -63,7 +64,7 @@ public class LocalSSVDPCADenseTest exten
// conf.set("mapred.job.tracker","localhost:11011");
// conf.set("fs.default.name","hdfs://localhost:11010/");
- Deque<Closeable> closeables = new LinkedList<Closeable>();
+ Deque<Closeable> closeables = Lists.newLinkedList();
Random rnd = RandomUtils.getRandom();
File tmpDir = getTestTempDir("svdtmp");
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java Wed Jun 12 20:44:19 2013
@@ -24,6 +24,7 @@ import java.util.Deque;
import java.util.LinkedList;
import java.util.Random;
+import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -78,7 +79,7 @@ public class LocalSSVDSolverSparseSequen
// conf.set("mapred.job.tracker","localhost:11011");
// conf.set("fs.default.name","hdfs://localhost:11010/");
- Deque<Closeable> closeables = new LinkedList<Closeable>();
+ Deque<Closeable> closeables = Lists.newLinkedList();;
Random rnd = RandomUtils.getRandom();
File tmpDir = getTestTempDir("svdtmp");
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFilesTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFilesTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFilesTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/EncodedVectorsFromSequenceFilesTest.java Wed Jun 12 20:44:19 2013
@@ -20,6 +20,7 @@ package org.apache.mahout.vectorizer;
import java.util.LinkedList;
import java.util.List;
+import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -92,7 +93,7 @@ public class EncodedVectorsFromSequenceF
Path tmpPath = getTestTempDirPath();
Path outputPath = new Path(tmpPath, "output");
- List<String> argList = new LinkedList<String>();
+ List<String> argList = Lists.newLinkedList();;
argList.add("-i");
argList.add(inputPath.toString());
argList.add("-o");
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/HighDFWordsPrunerTest.java Wed Jun 12 20:44:19 2013
@@ -16,6 +16,7 @@ package org.apache.mahout.vectorizer;
* limitations under the License.
*/
+import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -88,7 +89,7 @@ public class HighDFWordsPrunerTest exten
private void runTest(boolean prune) throws Exception {
Path outputPath = getTestTempFilePath("output");
- List<String> argList = new LinkedList<String>();
+ List<String> argList = Lists.newLinkedList();
argList.add("-i");
argList.add(inputPath.toString());
argList.add("-o");
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/SparseVectorsFromSequenceFilesTest.java Wed Jun 12 20:44:19 2013
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
+import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@@ -164,7 +165,7 @@ public class SparseVectorsFromSequenceFi
private Path runTest(boolean tfWeighting, boolean sequential, boolean named, double maxDFSigma, int numDocs) throws Exception {
Path outputPath = getTestTempFilePath("output");
- List<String> argList = new LinkedList<String>();
+ List<String> argList = Lists.newLinkedList();
argList.add("-i");
argList.add(inputPath.toString());
argList.add("-o");
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducerTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducerTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/CollocReducerTest.java Wed Jun 12 20:44:19 2013
@@ -21,6 +21,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedList;
+import com.google.common.collect.Lists;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.mahout.common.MahoutTestCase;
@@ -75,7 +76,7 @@ public final class CollocReducerTest ext
for (Gram[] ii : input) {
key.set(ii[0], empty);
- Collection<Gram> vv = new LinkedList<Gram>();
+ Collection<Gram> vv = Lists.newLinkedList();
vv.addAll(Arrays.asList(ii));
c.reduce(key, vv, context);
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/LLRReducerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/LLRReducerTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/LLRReducerTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/collocations/llr/LLRReducerTest.java Wed Jun 12 20:44:19 2013
@@ -21,6 +21,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedList;
+import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
@@ -106,7 +107,7 @@ public final class LLRReducerTest extend
reducer.setup(context);
for (Gram[] ii: input) {
- Collection<Gram> vv = new LinkedList<Gram>();
+ Collection<Gram> vv = Lists.newLinkedList();
vv.addAll(Arrays.asList(ii).subList(1, ii.length));
reducer.reduce(ii[0], vv, context);
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/email/MailToPrefsDriver.java Wed Jun 12 20:44:19 2013
@@ -99,10 +99,6 @@ public final class MailToPrefsDriver ext
int chunkSize = Integer.parseInt(getOption("chunkSize"));
String separator = getOption("separator");
Configuration conf = getConf();
- if (conf == null) {
- setConf(new Configuration());
- conf = getConf();
- }
boolean useCounts = hasOption("useCounts");
AtomicInteger currentPhase = new AtomicInteger();
int[] msgDim = new int[1];
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/DataFileIterator.java Wed Jun 12 20:44:19 2013
@@ -29,6 +29,8 @@ import org.apache.mahout.cf.taste.impl.m
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.common.iterator.FileLineIterator;
import org.apache.mahout.common.Pair;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* <p>An {@link java.util.Iterator} which iterates over any of the KDD Cup's rating files. These include the files
@@ -49,6 +51,8 @@ public final class DataFileIterator
private final FileLineIterator lineIterator;
+ private static final Logger log = LoggerFactory.getLogger(DataFileIterator.class);
+
public DataFileIterator(File dataFile) throws IOException {
if (dataFile == null || dataFile.isDirectory() || !dataFile.exists()) {
throw new IllegalArgumentException("Bad data file: " + dataFile);
@@ -132,7 +136,7 @@ public final class DataFileIterator
try {
Closeables.close(lineIterator, true);
} catch (IOException e) {
- //nothing
+ log.error(e.getMessage(), e);
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sequencelearning/hmm/PosTagger.java Wed Jun 12 20:44:19 2013
@@ -26,6 +26,7 @@ import java.util.Map;
import java.util.regex.Pattern;
import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.io.CharStreams;
import com.google.common.io.Resources;
@@ -114,13 +115,13 @@ public final class PosTagger {
*/
private static void readFromURL(String url, boolean assignIDs) throws IOException {
// initialize the data structure
- hiddenSequences = new LinkedList<int[]>();
- observedSequences = new LinkedList<int[]>();
+ hiddenSequences = Lists.newLinkedList();
+ observedSequences = Lists.newLinkedList();
readLines = 0;
// now read line by line of the input file
- List<Integer> observedSequence = new LinkedList<Integer>();
- List<Integer> hiddenSequence = new LinkedList<Integer>();
+ List<Integer> observedSequence = Lists.newLinkedList();
+ List<Integer> hiddenSequence = Lists.newLinkedList();
for (String line : CharStreams.readLines(Resources.newReaderSupplier(new URL(url), Charsets.UTF_8))) {
if (line.isEmpty()) {
@@ -155,8 +156,17 @@ public final class PosTagger {
Integer wordID = wordIDs.get(tags[0]);
Integer tagID = tagIDs.get(tags[1]);
// now construct the current sequence
- observedSequence.add(wordID == null ? 0 : wordID);
- hiddenSequence.add(tagID == null ? 0 : tagID);
+ if (wordID == null) {
+ observedSequence.add(0);
+ } else {
+ observedSequence.add(wordID);
+ }
+
+ if (tagID == null) {
+ hiddenSequence.add(0);
+ } else {
+ hiddenSequence.add(tagID);
+ }
}
// if there is still something in the pipe, register it
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/classifier/sgd/SimpleCsvExamples.java Wed Jun 12 20:44:19 2013
@@ -30,6 +30,8 @@ import org.apache.mahout.math.list.IntAr
import org.apache.mahout.math.stats.OnlineSummarizer;
import org.apache.mahout.vectorizer.encoders.ConstantValueEncoder;
import org.apache.mahout.vectorizer.encoders.FeatureVectorEncoder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.Closeable;
@@ -64,8 +66,9 @@ public final class SimpleCsvExamples {
public static final char SEPARATOR_CHAR = '\t';
private static final int FIELDS = 100;
- private SimpleCsvExamples() {
- }
+ private static final Logger log = LoggerFactory.getLogger(SimpleCsvExamples.class);
+
+ private SimpleCsvExamples() {}
public static void main(String[] args) throws IOException {
FeatureVectorEncoder[] encoder = new FeatureVectorEncoder[FIELDS];
@@ -280,7 +283,7 @@ public final class SimpleCsvExamples {
try {
Closeables.close(in, true);
} catch (IOException e) {
- //nothing
+ log.error(e.getMessage(), e);
}
}
}
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/display/DisplayKMeans.java Wed Jun 12 20:44:19 2013
@@ -89,8 +89,8 @@ public class DisplayKMeans extends Displ
}
private static void runSequentialKMeansClusterer(Configuration conf, Path samples, Path output,
- DistanceMeasure measure, int numClusters, int maxIterations, double convergenceDelta) throws IOException, InterruptedException,
- ClassNotFoundException {
+ DistanceMeasure measure, int numClusters, int maxIterations, double convergenceDelta)
+ throws IOException, InterruptedException, ClassNotFoundException {
Path clustersIn = new Path(output, "random-seeds");
RandomSeedGenerator.buildRandom(conf, samples, clustersIn, numClusters, measure);
KMeansDriver.run(samples, clustersIn, output, measure, convergenceDelta, maxIterations, true, 0.0, true);
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ClusterQualitySummarizer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ClusterQualitySummarizer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ClusterQualitySummarizer.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ClusterQualitySummarizer.java Wed Jun 12 20:44:19 2013
@@ -141,8 +141,7 @@ public class ClusterQualitySummarizer {
ClusteringUtils.summarizeClusterDistances(datapoints, centroids, distanceMeasure);
List<OnlineSummarizer> compareSummaries = null;
if (centroidsCompare != null) {
- compareSummaries =
- ClusteringUtils.summarizeClusterDistances(datapoints, centroidsCompare, distanceMeasure);
+ compareSummaries = ClusteringUtils.summarizeClusterDistances(datapoints, centroidsCompare, distanceMeasure);
}
System.out.printf("[Dunn Index] First: %f", ClusteringUtils.dunnIndex(centroids, distanceMeasure, summaries));
if (compareSummaries != null) {
@@ -198,8 +197,8 @@ public class ClusterQualitySummarizer {
.withShortName("cc")
.withRequired(false)
.withArgument(argumentBuilder.withName("centroidsCompare").withMaximum(1).create())
- .withDescription("where to get seq files with the second set of centroids (from Mahout KMeans or " +
- "StreamingKMeansDriver)")
+ .withDescription("where to get seq files with the second set of centroids (from Mahout KMeans or "
+ + "StreamingKMeansDriver)")
.create();
Option outputFileOption = builder.withLongName("output")
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/IOUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/IOUtils.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/IOUtils.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/IOUtils.java Wed Jun 12 20:44:19 2013
@@ -10,6 +10,9 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
public class IOUtils {
+
+ private IOUtils() {}
+
/**
* Converts CentroidWritable values in a sequence file into Centroids lazily.
* @param dirIterable the source iterable (comes from a SequenceFileDirIterable).
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/streaming/tools/ResplitSequenceFiles.java Wed Jun 12 20:44:19 2013
@@ -25,6 +25,7 @@ import org.apache.mahout.common.iterator
import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
public class ResplitSequenceFiles {
+
private String inputFile;
private String outputFileBase;
private int numSplits;
@@ -32,6 +33,8 @@ public class ResplitSequenceFiles {
private Configuration conf;
private FileSystem fs;
+ private ResplitSequenceFiles() {}
+
private void writeSplit(Iterator<Pair<Writable, Writable>> inputIterator,
int numSplit, int numEntriesPerSplit) throws IOException {
SequenceFile.Writer splitWriter = null;
@@ -85,8 +88,8 @@ public class ResplitSequenceFiles {
.withShortName("o")
.withRequired(true)
.withArgument(argumentBuilder.withName("output").withMaximum(1).create())
- .withDescription("the base name of the file split that the files will be split it; the i'th split has the " +
- "suffix -i")
+ .withDescription("the base name of the file split that the files will be split it; the i'th split has the "
+ + "suffix -i")
.create();
Option numSplitsOption = builder.withLongName("numSplits")
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java Wed Jun 12 20:44:19 2013
@@ -319,7 +319,8 @@ public class VectorBenchmarks {
.withLongName("numClusters")
.withRequired(false)
.withArgument(abuilder.withName("nc").withDefault(0).create())
- .withDescription("Number of clusters to create. Set to non zero to run cluster benchmark. Default: 0").withShortName("nc").create();
+ .withDescription("Number of clusters to create. Set to non zero to run cluster benchmark. Default: 0")
+ .withShortName("nc").create();
Option numOpsOpt = obuilder
.withLongName("numOps")
.withRequired(false)
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/model/hbase/HBaseDataModel.java Wed Jun 12 20:44:19 2013
@@ -458,7 +458,7 @@ public final class HBaseDataModel implem
Scan scan = new Scan(new byte[]{0x69}, new byte[]{0x70});
scan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, new KeyOnlyFilter(), new FirstKeyOnlyFilter()));
ResultScanner scanner = table.getScanner(scan);
- Collection<Long> ids = new LinkedList<Long>();
+ Collection<Long> ids = Lists.newLinkedList();
for (Result result : scanner) {
ids.add(bytesToUserOrItemID(result.getRow()));
}
@@ -483,7 +483,7 @@ public final class HBaseDataModel implem
Scan scan = new Scan(new byte[]{0x75}, new byte[]{0x76});
scan.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, new KeyOnlyFilter(), new FirstKeyOnlyFilter()));
ResultScanner scanner = table.getScanner(scan);
- Collection<Long> ids = new LinkedList<Long>();
+ Collection<Long> ids = Lists.newLinkedList();
for (Result result : scanner) {
ids.add(bytesToUserOrItemID(result.getRow()));
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/jdbc/MySQLJDBCDiffStorage.java Wed Jun 12 20:44:19 2013
@@ -25,7 +25,7 @@ import org.apache.mahout.cf.taste.impl.m
* MySQL-specific implementation. Should be used in conjunction with a
* {@link org.apache.mahout.cf.taste.impl.model.jdbc.MySQLJDBCDataModel}. This
* implementation stores item-item diffs in a MySQL database and encapsulates some other slope-one-specific
- * operations that are needed on the preference data in the database. It assumes the database has a schema
+ * OPERATIONS that are needed on the preference data in the database. It assumes the database has a schema
* like:
* </p>
*
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java Wed Jun 12 20:44:19 2013
@@ -26,10 +26,9 @@ import java.util.regex.Pattern;
* A wrapper class to convert an IndexFileNameFilter which implements
* java.io.FilenameFilter to an org.apache.hadoop.fs.PathFilter.
*/
-class LuceneIndexFileNameFilter implements PathFilter {
+final class LuceneIndexFileNameFilter implements PathFilter {
- private static final LuceneIndexFileNameFilter singleton =
- new LuceneIndexFileNameFilter();
+ private static final LuceneIndexFileNameFilter LUCENE_INDEX_FILE_NAME_FILTER = new LuceneIndexFileNameFilter();
/**
* Get a static instance.
@@ -37,18 +36,15 @@ class LuceneIndexFileNameFilter implemen
* @return the static instance
*/
public static LuceneIndexFileNameFilter getFilter() {
- return singleton;
+ return LUCENE_INDEX_FILE_NAME_FILTER;
}
- private LuceneIndexFileNameFilter() {
- }
+ private LuceneIndexFileNameFilter() {}
- //TODO: Lucene defines this in IndexFileNames, but it is package private, so make sure it doesn't change w/ new releases.
+ //TODO: Lucene defines this in IndexFileNames, but it is package private,
+ // so make sure it doesn't change w/ new releases.
private static final Pattern CODEC_FILE_PATTERN = Pattern.compile("_[a-z0-9]+(_.*)?\\..*");
- /* (non-Javadoc)
- * @see org.apache.hadoop.fs.PathFilter#accept(org.apache.hadoop.fs.Path)
- */
public boolean accept(Path path) {
String name = path.getName();
if (CODEC_FILE_PATTERN.matcher(name).matches() || name.startsWith(IndexFileNames.SEGMENTS)) {
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java Wed Jun 12 20:44:19 2013
@@ -16,6 +16,7 @@ package org.apache.mahout.text;
* limitations under the License.
*/
+import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -48,18 +49,21 @@ public class LuceneSegmentInputFormat ex
LuceneStorageConfiguration lucene2SeqConfiguration = new LuceneStorageConfiguration(configuration);
- List<LuceneSegmentInputSplit> inputSplits = new ArrayList<LuceneSegmentInputSplit>();
+ List<LuceneSegmentInputSplit> inputSplits = Lists.newArrayList();
List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths();
for (Path indexPath : indexPaths) {
- ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath, false, configuration);
+ ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath,
+ false, configuration);
SegmentInfos segmentInfos = new SegmentInfos();
segmentInfos.read(directory);
for (SegmentInfoPerCommit segmentInfo : segmentInfos) {
- LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(indexPath, segmentInfo.info.name, segmentInfo.sizeInBytes());
+ LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(indexPath, segmentInfo.info.name,
+ segmentInfo.sizeInBytes());
inputSplits.add(inputSplit);
- LOG.info("Created {} byte input split for index '{}' segment {}", segmentInfo.sizeInBytes(), indexPath.toUri(), segmentInfo.info.name);
+ LOG.info("Created {} byte input split for index '{}' segment {}", segmentInfo.sizeInBytes(), indexPath.toUri(),
+ segmentInfo.info.name);
}
}
@@ -67,7 +71,8 @@ public class LuceneSegmentInputFormat ex
}
@Override
- public RecordReader<Text, NullWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
+ public RecordReader<Text, NullWritable> createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
+ throws IOException, InterruptedException {
LuceneSegmentRecordReader luceneSegmentRecordReader = new LuceneSegmentRecordReader();
luceneSegmentRecordReader.initialize(inputSplit, context);
return luceneSegmentRecordReader;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java Wed Jun 12 20:44:19 2013
@@ -72,7 +72,8 @@ public class LuceneSegmentInputSplit ext
* @throws IOException if an error occurs when accessing the directory
*/
public SegmentInfoPerCommit getSegment(Configuration configuration) throws IOException {
- ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath, false, configuration);
+ ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath,
+ false, configuration);
SegmentInfos segmentInfos = new SegmentInfos();
segmentInfos.read(directory);
@@ -83,6 +84,7 @@ public class LuceneSegmentInputSplit ext
}
}
- throw new IllegalArgumentException("No such segment: '" + segmentInfoName + "' in directory " + directory.toString());
+ throw new IllegalArgumentException("No such segment: '" + segmentInfoName
+ + "' in directory " + directory.toString());
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java Wed Jun 12 20:44:19 2013
@@ -60,7 +60,8 @@ public class LuceneSegmentRecordReader e
Weight weight = lucene2SeqConfiguration.getQuery().createWeight(searcher);
scorer = weight.scorer(segmentReader.getContext(), false, false, null);
if (scorer == null) {
- throw new IllegalArgumentException("Could not create query scorer for query: " + lucene2SeqConfiguration.getQuery());
+ throw new IllegalArgumentException("Could not create query scorer for query: "
+ + lucene2SeqConfiguration.getQuery());
}
}
@@ -84,7 +85,8 @@ public class LuceneSegmentRecordReader e
@Override
public float getProgress() throws IOException, InterruptedException {
- return scorer.cost() == 0 ? 0 : (float) nextDocId / scorer.cost();//this is a rough estimate, due to the possible inaccuracies of cost
+ //this is a rough estimate, due to the possible inaccuracies of cost
+ return scorer.cost() == 0 ? 0 : (float) nextDocId / scorer.cost();
}
@Override
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java Wed Jun 12 20:44:19 2013
@@ -28,9 +28,12 @@ import static org.apache.commons.lang.St
*
**/
class LuceneSeqFileHelper {
+
public static final String SEPARATOR_FIELDS = " ";
public static final int USE_TERM_INFOS = 1;
+ private LuceneSeqFileHelper() {}
+
public static void populateValues(Document document, Text theValue, List<String> fields) {
StringBuilder valueBuilder = new StringBuilder();
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java Wed Jun 12 20:44:19 2013
@@ -17,6 +17,7 @@ package org.apache.mahout.text;
*/
import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
@@ -77,7 +78,8 @@ public class LuceneStorageConfiguration
* @param idField field used for the key of the sequence file
* @param fields field(s) used for the value of the sequence file
*/
- public LuceneStorageConfiguration(Configuration configuration, List<Path> indexPaths, Path sequenceFilesOutputPath, String idField, List<String> fields) {
+ public LuceneStorageConfiguration(Configuration configuration, List<Path> indexPaths, Path sequenceFilesOutputPath,
+ String idField, List<String> fields) {
Preconditions.checkArgument(configuration != null, "Parameter 'configuration' cannot be null");
Preconditions.checkArgument(indexPaths != null, "Parameter 'indexPaths' cannot be null");
Preconditions.checkArgument(indexPaths != null && !indexPaths.isEmpty(), "Parameter 'indexPaths' cannot be empty");
@@ -144,7 +146,8 @@ public class LuceneStorageConfiguration
* @return iterator
*/
public Iterator<Pair<Text, Text>> getSequenceFileIterator() {
- return new SequenceFileDirIterable<Text, Text>(sequenceFilesOutputPath, PathType.LIST, PathFilters.logsCRCFilter(), configuration).iterator();
+ return new SequenceFileDirIterable<Text, Text>(sequenceFilesOutputPath, PathType.LIST, PathFilters.logsCRCFilter(),
+ configuration).iterator();
}
public Configuration getConfiguration() {
@@ -202,16 +205,16 @@ public class LuceneStorageConfiguration
@Override
public void readFields(DataInput in) throws IOException {
try {
- this.sequenceFilesOutputPath = new Path(in.readUTF());
- this.indexPaths = new ArrayList<Path>();
+ sequenceFilesOutputPath = new Path(in.readUTF());
+ indexPaths = Lists.newArrayList();
String[] indexPaths = in.readUTF().split(SEPARATOR_PATHS);
for (String indexPath : indexPaths) {
this.indexPaths.add(new Path(indexPath));
}
- this.idField = in.readUTF();
- this.fields = Arrays.asList(in.readUTF().split(SEPARATOR_FIELDS));
- this.query = new QueryParser(LUCENE_43, "query", new StandardAnalyzer(LUCENE_43)).parse(in.readUTF());
- this.maxHits = in.readInt();
+ idField = in.readUTF();
+ fields = Arrays.asList(in.readUTF().split(SEPARATOR_FIELDS));
+ query = new QueryParser(LUCENE_43, "query", new StandardAnalyzer(LUCENE_43)).parse(in.readUTF());
+ maxHits = in.readInt();
} catch (ParseException e) {
throw new RuntimeException("Could not deserialize " + this.getClass().getName(), e);
}
@@ -219,18 +222,35 @@ public class LuceneStorageConfiguration
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
LuceneStorageConfiguration that = (LuceneStorageConfiguration) o;
- if (maxHits != that.maxHits) return false;
- if (fields != null ? !fields.equals(that.fields) : that.fields != null) return false;
- if (idField != null ? !idField.equals(that.idField) : that.idField != null) return false;
- if (indexPaths != null ? !indexPaths.equals(that.indexPaths) : that.indexPaths != null) return false;
- if (query != null ? !query.equals(that.query) : that.query != null) return false;
- if (sequenceFilesOutputPath != null ? !sequenceFilesOutputPath.equals(that.sequenceFilesOutputPath) : that.sequenceFilesOutputPath != null)
+ if (maxHits != that.maxHits) {
+ return false;
+ }
+ if (fields != null ? !fields.equals(that.fields) : that.fields != null) {
+ return false;
+ }
+ if (idField != null ? !idField.equals(that.idField) : that.idField != null) {
return false;
+ }
+ if (indexPaths != null ? !indexPaths.equals(that.indexPaths) : that.indexPaths != null) {
+ return false;
+ }
+ if (query != null ? !query.equals(that.query) : that.query != null) {
+ return false;
+ }
+ if (sequenceFilesOutputPath != null
+ ? !sequenceFilesOutputPath.equals(that.sequenceFilesOutputPath)
+ : that.sequenceFilesOutputPath != null) {
+ return false;
+ }
return true;
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java Wed Jun 12 20:44:19 2013
@@ -31,6 +31,8 @@ import org.apache.lucene.store.IOContext
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Collection;
@@ -49,15 +51,18 @@ public class ReadOnlyFileSystemDirectory
private final Path directory;
private final int ioFileBufferSize;
- /**
- * Constructor
- *
- * @param fs
- * @param directory
- * @param create
- * @param conf
- * @throws IOException
- */
+ private static final Logger log = LoggerFactory.getLogger(ReadOnlyFileSystemDirectory.class);
+
+ /**
+ * Constructor
+ *
+ * @param fs
+ * @param directory
+ * @param create
+ * @param conf
+ * @throws IOException
+ */
+
public ReadOnlyFileSystemDirectory(FileSystem fs, Path directory, boolean create,
Configuration conf) throws IOException {
@@ -76,7 +81,7 @@ public class ReadOnlyFileSystemDirectory
isDir = status.isDir();
}
} catch (IOException e) {
- // file does not exist, isDir already set to false
+ log.error(e.getMessage(), e);
}
if (!isDir) {
throw new IOException(directory + " is not a directory");
@@ -96,7 +101,7 @@ public class ReadOnlyFileSystemDirectory
isDir = status.isDir();
}
} catch (IOException e) {
- // file does not exist, isDir already set to false
+ log.error(e.getMessage(), e);
}
if (!isDir) {
throw new IOException(directory + " is not a directory");
@@ -113,9 +118,6 @@ public class ReadOnlyFileSystemDirectory
}
}
- /* (non-Javadoc)
- * @see org.apache.lucene.store.Directory#list()
- */
public String[] list() throws IOException {
FileStatus[] fileStatus =
fs.listStatus(directory, LuceneIndexFileNameFilter.getFilter());
@@ -131,50 +133,23 @@ public class ReadOnlyFileSystemDirectory
return list();
}
- /* (non-Javadoc)
- * @see org.apache.lucene.store.Directory#fileExists(java.lang.String)
- */
+ @Override
public boolean fileExists(String name) throws IOException {
return fs.exists(new Path(directory, name));
}
- /* (non-Javadoc)
- * @see org.apache.lucene.store.Directory#fileModified(java.lang.String)
- */
- public long fileModified(String name) {
- throw new UnsupportedOperationException();
- }
-
- /* (non-Javadoc)
- * @see org.apache.lucene.store.Directory#touchFile(java.lang.String)
- */
- public void touchFile(String name) {
- throw new UnsupportedOperationException();
- }
-
- /* (non-Javadoc)
- * @see org.apache.lucene.store.Directory#fileLength(java.lang.String)
- */
+ @Override
public long fileLength(String name) throws IOException {
return fs.getFileStatus(new Path(directory, name)).getLen();
}
- /* (non-Javadoc)
- * @see org.apache.lucene.store.Directory#deleteFile(java.lang.String)
- */
+ @Override
public void deleteFile(String name) throws IOException {
if (!fs.delete(new Path(directory, name), true)) {
throw new IOException("Cannot delete index file " + name);
}
}
- /* (non-Javadoc)
- * @see org.apache.lucene.store.Directory#renameFile(java.lang.String, java.lang.String)
- */
- public void renameFile(String from, String to) throws IOException {
- fs.rename(new Path(directory, from), new Path(directory, to));
- }
-
@Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
//TODO: What should we be doing with the IOContext here, if anything?
@@ -197,10 +172,7 @@ public class ReadOnlyFileSystemDirectory
return new FileSystemIndexInput(new Path(directory, name), ioFileBufferSize);
}
-
- /* (non-Javadoc)
- * @see org.apache.lucene.store.Directory#makeLock(java.lang.String)
- */
+ @Override
public Lock makeLock(final String name) {
return new Lock() {
public boolean obtain() {
@@ -220,21 +192,17 @@ public class ReadOnlyFileSystemDirectory
};
}
- /* (non-Javadoc)
- * @see org.apache.lucene.store.Directory#close()
- */
+ @Override
public void close() throws IOException {
// do not close the file system
}
- /* (non-Javadoc)
- * @see java.lang.Object#toString()
- */
+ @Override
public String toString() {
return this.getClass().getName() + "@" + directory;
}
- private class FileSystemIndexInput extends BufferedIndexInput {
+ private class FileSystemIndexInput extends BufferedIndexInput implements Cloneable {
// shared by clones
private class Descriptor {
@@ -253,7 +221,7 @@ public class ReadOnlyFileSystemDirectory
private boolean isClone;
public FileSystemIndexInput(Path path, int ioFileBufferSize)
- throws IOException {
+ throws IOException {
super("FSII_" + path.getName(), ioFileBufferSize);
filePath = path;
descriptor = new Descriptor(path, ioFileBufferSize);
@@ -261,8 +229,9 @@ public class ReadOnlyFileSystemDirectory
isOpen = true;
}
+ @Override
protected void readInternal(byte[] b, int offset, int len)
- throws IOException {
+ throws IOException {
long position = getFilePointer();
if (position != descriptor.position) {
descriptor.in.seek(position);
@@ -279,6 +248,7 @@ public class ReadOnlyFileSystemDirectory
} while (total < len);
}
+ @Override
public void close() throws IOException {
if (!isClone) {
if (isOpen) {
@@ -290,20 +260,24 @@ public class ReadOnlyFileSystemDirectory
}
}
+ @Override
protected void seekInternal(long position) {
// handled in readInternal()
}
+ @Override
public long length() {
return length;
}
+ @Override
protected void finalize() throws IOException {
if (!isClone && isOpen) {
close(); // close the file
}
}
+ @Override
public BufferedIndexInput clone() {
FileSystemIndexInput clone = (FileSystemIndexInput) super.clone();
clone.isClone = true;
@@ -318,17 +292,19 @@ public class ReadOnlyFileSystemDirectory
private boolean isOpen;
public FileSystemIndexOutput(Path path, int ioFileBufferSize)
- throws IOException {
+ throws IOException {
filePath = path;
// overwrite is true by default
out = fs.create(path, true, ioFileBufferSize);
isOpen = true;
}
+ @Override
public void flushBuffer(byte[] b, int offset, int size) throws IOException {
out.write(b, offset, size);
}
+ @Override
public void close() throws IOException {
if (isOpen) {
super.close();
@@ -339,14 +315,17 @@ public class ReadOnlyFileSystemDirectory
}
}
+ @Override
public void seek(long pos) throws IOException {
throw new UnsupportedOperationException();
}
+ @Override
public long length() throws IOException {
return out.getPos();
}
+ @Override
protected void finalize() throws IOException {
if (isOpen) {
close(); // close the file
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java Wed Jun 12 20:44:19 2013
@@ -65,9 +65,11 @@ public class SequenceFilesFromLuceneStor
Configuration configuration = lucene2seqConf.getConfiguration();
FileSystem fileSystem = FileSystem.get(configuration);
Path sequenceFilePath = new Path(lucene2seqConf.getSequenceFilesOutputPath(), indexPath.getName());
- final SequenceFile.Writer sequenceFileWriter = new SequenceFile.Writer(fileSystem, configuration, sequenceFilePath, Text.class, Text.class);
+ final SequenceFile.Writer sequenceFileWriter = new SequenceFile.Writer(fileSystem, configuration,
+ sequenceFilePath, Text.class, Text.class);
- SeqFileWriterCollector writerCollector = new SeqFileWriterCollector(lucene2seqConf, sequenceFileWriter, processedDocs);
+ SeqFileWriterCollector writerCollector = new SeqFileWriterCollector(lucene2seqConf, sequenceFileWriter,
+ processedDocs);
searcher.search(lucene2seqConf.getQuery(), writerCollector);
log.info("Wrote " + writerCollector.processedDocs + " documents in " + sequenceFilePath.toUri());
processedDocs = writerCollector.processedDocs;
@@ -84,7 +86,8 @@ public class SequenceFilesFromLuceneStor
public int processedDocs;
AtomicReaderContext arc;
- SeqFileWriterCollector(LuceneStorageConfiguration lucene2seqConf, SequenceFile.Writer sequenceFileWriter, int processedDocs) {
+ SeqFileWriterCollector(LuceneStorageConfiguration lucene2seqConf, SequenceFile.Writer sequenceFileWriter,
+ int processedDocs) {
this.lucene2seqConf = lucene2seqConf;
this.sequenceFileWriter = sequenceFileWriter;
this.processedDocs = processedDocs;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java Wed Jun 12 20:44:19 2013
@@ -17,6 +17,7 @@ package org.apache.mahout.text;
*/
+import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
@@ -75,12 +76,9 @@ public class SequenceFilesFromLuceneStor
}
Configuration configuration = getConf();
- if (configuration == null) {
- configuration = new Configuration();
- }
String[] paths = getInputPath().toString().split(",");
- List<Path> indexPaths = new ArrayList<Path>();
+ List<Path> indexPaths = Lists.newArrayList();
for (String path : paths) {
indexPaths.add(new Path(path));
}
@@ -100,7 +98,8 @@ public class SequenceFilesFromLuceneStor
if (hasOption(OPTION_QUERY)) {
try {
String queryString = COMPILE.matcher(getOption(OPTION_QUERY)).replaceAll("");
- QueryParser queryParser = new QueryParser(Version.LUCENE_43, queryString, new StandardAnalyzer(Version.LUCENE_43));
+ QueryParser queryParser = new QueryParser(Version.LUCENE_43, queryString,
+ new StandardAnalyzer(Version.LUCENE_43));
query = queryParser.parse(queryString);
} catch (ParseException e) {
throw new IllegalArgumentException(e.getMessage(), e);
@@ -115,7 +114,8 @@ public class SequenceFilesFromLuceneStor
}
lucene2SeqConf.setMaxHits(maxHits);
- if (hasOption(DefaultOptionCreator.METHOD_OPTION) && getOption(DefaultOptionCreator.METHOD_OPTION).equals("sequential")) {
+ if (hasOption(DefaultOptionCreator.METHOD_OPTION)
+ && getOption(DefaultOptionCreator.METHOD_OPTION).equals("sequential")) {
new SequenceFilesFromLuceneStorage().run(lucene2SeqConf);
} else {
new SequenceFilesFromLuceneStorageMRJob().run(lucene2SeqConf);
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java Wed Jun 12 20:44:19 2013
@@ -27,8 +27,8 @@ import org.apache.hadoop.mapreduce.lib.o
import java.io.IOException;
/**
- * Generates a sequence file from a Lucene index via MapReduce. Uses a specified id field as the key and a content field as the value.
- * Configure this class with a {@link LuceneStorageConfiguration} bean.
+ * Generates a sequence file from a Lucene index via MapReduce. Uses a specified id field as the key and a content field
+ * as the value. Configure this class with a {@link LuceneStorageConfiguration} bean.
*/
public class SequenceFilesFromLuceneStorageMRJob {
@@ -36,7 +36,8 @@ public class SequenceFilesFromLuceneStor
try {
Configuration configuration = lucene2seqConf.serialize();
- Job job = new Job(configuration, "LuceneIndexToSequenceFiles: " + lucene2seqConf.getIndexPaths() + " -> M/R -> " + lucene2seqConf.getSequenceFilesOutputPath());
+ Job job = new Job(configuration, "LuceneIndexToSequenceFiles: " + lucene2seqConf.getIndexPaths() + " -> M/R -> "
+ + lucene2seqConf.getSequenceFilesOutputPath());
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java Wed Jun 12 20:44:19 2013
@@ -21,7 +21,7 @@ import static org.apache.commons.lang.St
*/
public class SequenceFilesFromLuceneStorageMapper extends Mapper<Text, NullWritable, Text, Text> {
- public enum DataStatus {EMPTY_KEY, EMPTY_VALUE, EMPTY_BOTH}
+ public enum DataStatus { EMPTY_KEY, EMPTY_VALUE, EMPTY_BOTH }
private LuceneStorageConfiguration l2sConf;
private SegmentReader segmentReader;
@@ -50,9 +50,9 @@ public class SequenceFilesFromLuceneStor
context.getCounter(DataStatus.EMPTY_BOTH).increment(1);
return;
}
- if (isBlank(theKey.toString())){
+ if (isBlank(theKey.toString())) {
context.getCounter(DataStatus.EMPTY_KEY).increment(1);
- } else if (isBlank(theValue.toString())){
+ } else if (isBlank(theValue.toString())) {
context.getCounter(DataStatus.EMPTY_VALUE).increment(1);
}
context.write(theKey, theValue);
@@ -62,4 +62,4 @@ public class SequenceFilesFromLuceneStor
protected void cleanup(Context context) throws IOException, InterruptedException {
segmentReader.close();
}
-}
\ No newline at end of file
+}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsJob.java Wed Jun 12 20:44:19 2013
@@ -54,8 +54,6 @@ public class ConcatenateVectorsJob exten
static final String MATRIXA_DIMS = "mahout.concatenatevectors.matrixA_dims";
static final String MATRIXB_DIMS = "mahout.concatenatevectors.matrixB_dims";
-
- private static final Logger LOG = LoggerFactory.getLogger(ConcatenateVectorsJob.class);
private ConcatenateVectorsJob() {}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsReducer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsReducer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/ConcatenateVectorsReducer.java Wed Jun 12 20:44:19 2013
@@ -77,8 +77,9 @@ public class ConcatenateVectorsReducer e
if (vOut == null) {
vOut = new SequentialAccessSparseVector(dimsA + dimsB);
- if (isNamed)
+ if (isNamed) {
vOut = new NamedVector(vOut, name);
+ }
}
if (vA != null) {
@@ -92,7 +93,8 @@ public class ConcatenateVectorsReducer e
}
private void appendVector(Vector vOut, Vector vIn, int offset) {
- for (Vector.Element element : vIn.nonZeroes())
+ for (Vector.Element element : vIn.nonZeroes()) {
vOut.set(element.index() + offset, element.get());
+ }
}
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/SplitInput.java Wed Jun 12 20:44:19 2013
@@ -277,16 +277,14 @@ public class SplitInput extends Abstract
*/
public void splitDirectory(Path inputDir) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = getConf();
- if (conf == null) {
- conf = new Configuration();
- }
splitDirectory(conf, inputDir);
}
/*
* See also splitDirectory(Path inputDir)
* */
- public void splitDirectory(Configuration conf, Path inputDir) throws IOException, ClassNotFoundException, InterruptedException {
+ public void splitDirectory(Configuration conf, Path inputDir)
+ throws IOException, ClassNotFoundException, InterruptedException {
FileSystem fs = inputDir.getFileSystem(conf);
if (fs.getFileStatus(inputDir) == null) {
throw new IOException(inputDir + " does not exist");
@@ -315,9 +313,6 @@ public class SplitInput extends Abstract
*/
public void splitFile(Path inputFile) throws IOException {
Configuration conf = getConf();
- if (conf == null) {
- conf = new Configuration();
- }
FileSystem fs = inputFile.getFileSystem(conf);
if (fs.getFileStatus(inputFile) == null) {
throw new IOException(inputFile + " does not exist");
@@ -655,9 +650,6 @@ public class SplitInput extends Abstract
if (!useMapRed) {
Configuration conf = getConf();
- if (conf == null) {
- conf = new Configuration();
- }
FileSystem fs = trainingOutputDirectory.getFileSystem(conf);
FileStatus trainingOutputDirStatus = fs.getFileStatus(trainingOutputDirectory);
Preconditions.checkArgument(trainingOutputDirStatus != null && trainingOutputDirStatus.isDir(),
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java Wed Jun 12 20:44:19 2013
@@ -88,7 +88,7 @@ public abstract class AbstractClusterWri
}
});
- Collection<Pair<String, Double>> topTerms = new LinkedList<Pair<String, Double>>();
+ Collection<Pair<String, Double>> topTerms = Lists.newLinkedList();
for (int i = 0; i < vectorTerms.size() && i < numTerms; i++) {
int index = vectorTerms.get(i).index;
@@ -128,7 +128,7 @@ public abstract class AbstractClusterWri
}
});
- Collection<Pair<String, Double>> topTerms = new LinkedList<Pair<String, Double>>();
+ Collection<Pair<String, Double>> topTerms = Lists.newLinkedList();
for (int i = 0; i < vectorTerms.size() && i < numTerms; i++) {
int index = vectorTerms.get(i).index;
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/email/MailProcessor.java Wed Jun 12 20:44:19 2013
@@ -33,9 +33,8 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
- * Converts an mbox mail archive into a group of Hadoop Sequence Files with equal size. The archive may optionally be gzipped or zipped.
- * @see org.apache.mahout.text.SequenceFilesFromMailArchives
- *
+ * Converts an mbox mail archive into a group of Hadoop Sequence Files with equal size. The archive may optionally be
+ * gzipped or zipped. @see org.apache.mahout.text.SequenceFilesFromMailArchives
*/
public class MailProcessor {
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java Wed Jun 12 20:44:19 2013
@@ -24,6 +24,8 @@ import org.apache.lucene.analysis.standa
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
import org.apache.mahout.common.lucene.TokenStreamIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.StringReader;
@@ -33,6 +35,8 @@ public class AnalyzerTransformer impleme
private Analyzer analyzer;
private String fieldName = "text";
+ private static final Logger log = LoggerFactory.getLogger(AnalyzerTransformer.class);
+
public AnalyzerTransformer() {
this(new StandardAnalyzer(Version.LUCENE_43), "text");
}
@@ -65,7 +69,7 @@ public class AnalyzerTransformer impleme
try {
Closeables.close(ts, true);
} catch (IOException e) {
- //nothing
+ log.error(e.getMessage(), e);
}
}
return result.toString();
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/arff/Driver.java Wed Jun 12 20:44:19 2013
@@ -171,7 +171,7 @@ public final class Driver {
// Turn the map of labels into a list order by order of appearance
List<Entry<String, Integer>> attributes = Lists.newArrayList();
attributes.addAll(arffModel.getLabelBindings().entrySet());
- Collections.sort(attributes, new Comparator<Map.Entry<String, Integer>>(){
+ Collections.sort(attributes, new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Entry<String, Integer> t, Entry<String, Integer> t1) {
return t.getValue().compareTo(t1.getValue());
@@ -219,13 +219,14 @@ public final class Driver {
Map<String, Map<String, Integer>> nominalMap = arffModel.getNominalMap();
// how many nominal attributes
writer.write(String.valueOf(nominalMap.size()) + "\n");
- for (String attribute : nominalMap.keySet()) {
+
+ for (Entry<String, Map<String, Integer>> entry : nominalMap.entrySet()) {
// the label of this attribute
- writer.write(attribute + "\n");
- Set<Entry<String, Integer>> attributeValues = nominalMap.get(attribute).entrySet();
+ writer.write(entry.getKey() + "\n");
+ Set<Entry<String, Integer>> attributeValues = entry.getValue().entrySet();
// how many values does this attribute have
writer.write(attributeValues.size() + "\n");
- for (Map.Entry<String, Integer> value : nominalMap.get(attribute).entrySet()) {
+ for (Map.Entry<String, Integer> value : attributeValues) {
// the value and the value index
writer.write(String.format("%s%s%s\n", value.getKey(), delimiter, value.getValue().toString()));
}
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/vectors/lucene/ClusterLabels.java Wed Jun 12 20:44:19 2013
@@ -31,6 +31,7 @@ import java.util.Set;
import java.util.TreeSet;
import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import com.google.common.io.Files;
@@ -214,7 +215,7 @@ public class ClusterLabels {
}
- List<TermInfoClusterInOut> clusteredTermInfo = new LinkedList<TermInfoClusterInOut>();
+ List<TermInfoClusterInOut> clusteredTermInfo = Lists.newLinkedList();
int clusterSize = wvws.size();
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java Wed Jun 12 20:44:19 2013
@@ -16,6 +16,7 @@
*/
package org.apache.mahout.text;
+import com.google.common.collect.Lists;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -43,8 +44,8 @@ public abstract class AbstractLuceneStor
protected Path indexPath1;
protected Path indexPath2;
- protected List<SingleFieldDocument> docs = new ArrayList<SingleFieldDocument>();
- protected List<SingleFieldDocument> misshapenDocs = new ArrayList<SingleFieldDocument>();
+ protected List<SingleFieldDocument> docs = Lists.newArrayList();
+ protected List<SingleFieldDocument> misshapenDocs = Lists.newArrayList();
@Override
public void setUp() throws Exception {
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java Wed Jun 12 20:44:19 2013
@@ -62,7 +62,7 @@ public class LuceneSegmentInputSplitTest
}
@Test(expected = IllegalArgumentException.class)
- public void testGetSegment_nonExistingSegment() throws Exception {
+ public void testGetSegmentNonExistingSegment() throws Exception {
SingleFieldDocument doc1 = new SingleFieldDocument("1", "This is simple document 1");
SingleFieldDocument doc2 = new SingleFieldDocument("2", "This is simple document 2");
SingleFieldDocument doc3 = new SingleFieldDocument("3", "This is simple document 3");
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java Wed Jun 12 20:44:19 2013
@@ -42,7 +42,7 @@ public class LuceneStorageConfigurationT
}
@Test(expected = IllegalArgumentException.class)
- public void testSerialization_notSerialized() throws IOException {
+ public void testSerializationNotSerialized() throws IOException {
new LuceneStorageConfiguration(new Configuration());
}
}
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriverTest.java Wed Jun 12 20:44:19 2013
@@ -121,7 +121,7 @@ public class SequenceFilesFromLuceneStor
}
@Test
- public void testRun_optionalArguments() throws Exception {
+ public void testRunOptionalArguments() throws Exception {
String[] args = {
"-i", getIndexPath1AsFile().toString(),
"-o", seqFilesOutputPath.toString(),
@@ -144,7 +144,7 @@ public class SequenceFilesFromLuceneStor
}
@Test
- public void testRun_invalidQuery() throws Exception {
+ public void testRunInvalidQuery() throws Exception {
String[] args = {
"-i", getIndexPath1AsFile().toString(),
"-o", seqFilesOutputPath.toString(),
Modified: mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java?rev=1492416&r1=1492415&r2=1492416&view=diff
==============================================================================
--- mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java (original)
+++ mahout/trunk/integration/src/test/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageTest.java Wed Jun 12 20:44:19 2013
@@ -100,7 +100,7 @@ public class SequenceFilesFromLuceneStor
@SuppressWarnings("unchecked")
@Test
- public void testRun_skipUnstoredFields() throws IOException {
+ public void testRunSkipUnstoredFields() throws IOException {
commitDocuments(getDirectory(getIndexPath1AsFile()), new UnstoredFieldsDocument("5", "This is test document 5"));
LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration,
@@ -119,7 +119,7 @@ public class SequenceFilesFromLuceneStor
@SuppressWarnings("unchecked")
@Test
- public void testRun_maxHits() throws IOException {
+ public void testRunMaxHits() throws IOException {
commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500));
commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(1000, 1500));
@@ -141,7 +141,7 @@ public class SequenceFilesFromLuceneStor
@SuppressWarnings("unchecked")
@Test
- public void testRun_query() throws IOException {
+ public void testRunQuery() throws IOException {
commitDocuments(getDirectory(getIndexPath1AsFile()), docs);
LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration,
asList(getIndexPath1()),
@@ -162,7 +162,7 @@ public class SequenceFilesFromLuceneStor
}
@Test
- public void testRun_multipleFields() throws IOException {
+ public void testRunMultipleFields() throws IOException {
LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration,
asList(getIndexPath1()),
seqFilesOutputPath,
@@ -184,7 +184,7 @@ public class SequenceFilesFromLuceneStor
}
@Test
- public void testRun_numericField() throws IOException {
+ public void testRunNumericField() throws IOException {
LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(configuration,
asList(getIndexPath1()),
seqFilesOutputPath,